diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst
index b7d801993bfa17196659cd59aed1e727c4aa1083..bcaefc952764effc93e7b74bb72c41f7335c09c7 100644
--- a/Documentation/gpu/i915.rst
+++ b/Documentation/gpu/i915.rst
@@ -539,6 +539,7 @@ GuC ABI
 .. kernel-doc:: drivers/gpu/drm/i915/gt/uc/abi/guc_communication_mmio_abi.h
 .. kernel-doc:: drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h
 .. kernel-doc:: drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
+.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h
 
 HuC
 ---
diff --git a/drivers/gpu/drm/dp/drm_dp.c b/drivers/gpu/drm/dp/drm_dp.c
index 6d43325acca560c85db5b5ad518cd462cf84d3b8..a20b0f8f24b87fe1a84d058a6937e851464bba6b 100644
--- a/drivers/gpu/drm/dp/drm_dp.c
+++ b/drivers/gpu/drm/dp/drm_dp.c
@@ -144,6 +144,69 @@ u8 drm_dp_get_adjust_tx_ffe_preset(const u8 link_status[DP_LINK_STATUS_SIZE],
 }
 EXPORT_SYMBOL(drm_dp_get_adjust_tx_ffe_preset);
 
+/* DP 2.0 errata for 128b/132b */
+bool drm_dp_128b132b_lane_channel_eq_done(const u8 link_status[DP_LINK_STATUS_SIZE],
+					  int lane_count)
+{
+	u8 lane_align, lane_status;
+	int lane;
+
+	lane_align = dp_link_status(link_status, DP_LANE_ALIGN_STATUS_UPDATED);
+	if (!(lane_align & DP_INTERLANE_ALIGN_DONE))
+		return false;
+
+	for (lane = 0; lane < lane_count; lane++) {
+		lane_status = dp_get_lane_status(link_status, lane);
+		if (!(lane_status & DP_LANE_CHANNEL_EQ_DONE))
+			return false;
+	}
+	return true;
+}
+EXPORT_SYMBOL(drm_dp_128b132b_lane_channel_eq_done);
+
+/* DP 2.0 errata for 128b/132b */
+bool drm_dp_128b132b_lane_symbol_locked(const u8 link_status[DP_LINK_STATUS_SIZE],
+					int lane_count)
+{
+	u8 lane_status;
+	int lane;
+
+	for (lane = 0; lane < lane_count; lane++) {
+		lane_status = dp_get_lane_status(link_status, lane);
+		if (!(lane_status & DP_LANE_SYMBOL_LOCKED))
+			return false;
+	}
+	return true;
+}
+EXPORT_SYMBOL(drm_dp_128b132b_lane_symbol_locked);
+
+/* DP 2.0 errata for 128b/132b */
+bool drm_dp_128b132b_eq_interlane_align_done(const u8 link_status[DP_LINK_STATUS_SIZE])
+{
+	u8 status = dp_link_status(link_status, DP_LANE_ALIGN_STATUS_UPDATED);
+
+	return status & DP_128B132B_DPRX_EQ_INTERLANE_ALIGN_DONE;
+}
+EXPORT_SYMBOL(drm_dp_128b132b_eq_interlane_align_done);
+
+/* DP 2.0 errata for 128b/132b */
+bool drm_dp_128b132b_cds_interlane_align_done(const u8 link_status[DP_LINK_STATUS_SIZE])
+{
+	u8 status = dp_link_status(link_status, DP_LANE_ALIGN_STATUS_UPDATED);
+
+	return status & DP_128B132B_DPRX_CDS_INTERLANE_ALIGN_DONE;
+}
+EXPORT_SYMBOL(drm_dp_128b132b_cds_interlane_align_done);
+
+/* DP 2.0 errata for 128b/132b */
+bool drm_dp_128b132b_link_training_failed(const u8 link_status[DP_LINK_STATUS_SIZE])
+{
+	u8 status = dp_link_status(link_status, DP_LANE_ALIGN_STATUS_UPDATED);
+
+	return status & DP_128B132B_LT_FAILED;
+}
+EXPORT_SYMBOL(drm_dp_128b132b_link_training_failed);
+
 u8 drm_dp_get_adjust_request_post_cursor(const u8 link_status[DP_LINK_STATUS_SIZE],
 					 unsigned int lane)
 {
@@ -281,6 +344,26 @@ int drm_dp_read_channel_eq_delay(struct drm_dp_aux *aux, const u8 dpcd[DP_RECEIV
 }
 EXPORT_SYMBOL(drm_dp_read_channel_eq_delay);
 
+/* Per DP 2.0 Errata */
+int drm_dp_128b132b_read_aux_rd_interval(struct drm_dp_aux *aux)
+{
+	int unit;
+	u8 val;
+
+	if (drm_dp_dpcd_readb(aux, DP_128B132B_TRAINING_AUX_RD_INTERVAL, &val) != 1) {
+		drm_err(aux->drm_dev, "%s: failed rd interval read\n",
+			aux->name);
+		/* default to max */
+		val = DP_128B132B_TRAINING_AUX_RD_INTERVAL_MASK;
+	}
+
+	unit = (val & DP_128B132B_TRAINING_AUX_RD_INTERVAL_1MS_UNIT) ? 1 : 2;
+	val &= DP_128B132B_TRAINING_AUX_RD_INTERVAL_MASK;
+
+	return (val + 1) * unit * 1000;
+}
+EXPORT_SYMBOL(drm_dp_128b132b_read_aux_rd_interval);
+
 void drm_dp_link_train_clock_recovery_delay(const struct drm_dp_aux *aux,
 					    const u8 dpcd[DP_RECEIVER_CAP_SIZE])
 {
diff --git a/drivers/gpu/drm/drm_cache.c b/drivers/gpu/drm/drm_cache.c
index 4bb093ccf1b8d4677bbf08e0329bc29957dd93e5..4b0da6baff78e7c38a08446a4d68e765c5595b5b 100644
--- a/drivers/gpu/drm/drm_cache.c
+++ b/drivers/gpu/drm/drm_cache.c
@@ -221,7 +221,7 @@ static void memcpy_fallback(struct iosys_map *dst,
 	if (!dst->is_iomem && !src->is_iomem) {
 		memcpy(dst->vaddr, src->vaddr, len);
 	} else if (!src->is_iomem) {
-		iosys_map_memcpy_to(dst, src->vaddr, len);
+		iosys_map_memcpy_to(dst, 0, src->vaddr, len);
 	} else if (!dst->is_iomem) {
 		memcpy_fromio(dst->vaddr, src->vaddr_iomem, len);
 	} else {
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index e9a9d35fbf5e4faf6a006e56ea0746d6c99d0ebe..6f72627369f8c51281440d100e30bf6f266c1f78 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -385,7 +385,7 @@ static void drm_fb_helper_damage_blit_real(struct drm_fb_helper *fb_helper,
 	iosys_map_incr(dst, offset); /* go to first pixel within clip rect */
 
 	for (y = clip->y1; y < clip->y2; y++) {
-		iosys_map_memcpy_to(dst, src, len);
+		iosys_map_memcpy_to(dst, 0, src, len);
 		iosys_map_incr(dst, fb->pitches[0]);
 		src += fb->pitches[0];
 	}
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 139e0e7eba94fcb11ce52ad6e25366616f0285ff..9d588d936e3dc3994cee0064ceb1777449fe431e 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -13,6 +13,7 @@
 # will most likely get a sudden build breakage... Hopefully we will fix
 # new warnings before CI updates!
 subdir-ccflags-y := -Wall -Wextra
+subdir-ccflags-y += -Wno-format-security
 subdir-ccflags-y += -Wno-unused-parameter
 subdir-ccflags-y += -Wno-type-limits
 subdir-ccflags-y += -Wno-missing-field-initializers
@@ -174,7 +175,7 @@ i915-y += \
 	  i915_trace_points.o \
 	  i915_ttm_buddy_manager.o \
 	  i915_vma.o \
-	  i915_vma_snapshot.o \
+	  i915_vma_resource.o \
 	  intel_wopcm.o
 
 # general-purpose microcontroller (GuC) support
@@ -197,6 +198,7 @@ i915-y += gt/uc/intel_uc.o \
 
 # modesetting core code
 i915-y += \
+	display/hsw_ips.o \
 	display/intel_atomic.o \
 	display/intel_atomic_plane.o \
 	display/intel_audio.o \
diff --git a/drivers/gpu/drm/i915/display/hsw_ips.c b/drivers/gpu/drm/i915/display/hsw_ips.c
new file mode 100644
index 0000000000000000000000000000000000000000..38014e0cc9ad5423054e5a274555d0904b0666b7
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/hsw_ips.c
@@ -0,0 +1,271 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "hsw_ips.h"
+#include "i915_drv.h"
+#include "i915_reg.h"
+#include "intel_de.h"
+#include "intel_display_types.h"
+#include "intel_pcode.h"
+
+static void hsw_ips_enable(const struct intel_crtc_state *crtc_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
+
+	if (!crtc_state->ips_enabled)
+		return;
+
+	/*
+	 * We can only enable IPS after we enable a plane and wait for a vblank
+	 * This function is called from post_plane_update, which is run after
+	 * a vblank wait.
+	 */
+	drm_WARN_ON(&i915->drm,
+		    !(crtc_state->active_planes & ~BIT(PLANE_CURSOR)));
+
+	if (IS_BROADWELL(i915)) {
+		drm_WARN_ON(&i915->drm,
+			    snb_pcode_write(i915, DISPLAY_IPS_CONTROL,
+					    IPS_ENABLE | IPS_PCODE_CONTROL));
+		/*
+		 * Quoting Art Runyan: "its not safe to expect any particular
+		 * value in IPS_CTL bit 31 after enabling IPS through the
+		 * mailbox." Moreover, the mailbox may return a bogus state,
+		 * so we need to just enable it and continue on.
+		 */
+	} else {
+		intel_de_write(i915, IPS_CTL, IPS_ENABLE);
+		/*
+		 * The bit only becomes 1 in the next vblank, so this wait here
+		 * is essentially intel_wait_for_vblank. If we don't have this
+		 * and don't wait for vblanks until the end of crtc_enable, then
+		 * the HW state readout code will complain that the expected
+		 * IPS_CTL value is not the one we read.
+		 */
+		if (intel_de_wait_for_set(i915, IPS_CTL, IPS_ENABLE, 50))
+			drm_err(&i915->drm,
+				"Timed out waiting for IPS enable\n");
+	}
+}
+
+bool hsw_ips_disable(const struct intel_crtc_state *crtc_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
+	bool need_vblank_wait = false;
+
+	if (!crtc_state->ips_enabled)
+		return need_vblank_wait;
+
+	if (IS_BROADWELL(i915)) {
+		drm_WARN_ON(&i915->drm,
+			    snb_pcode_write(i915, DISPLAY_IPS_CONTROL, 0));
+		/*
+		 * Wait for PCODE to finish disabling IPS. The BSpec specified
+		 * 42ms timeout value leads to occasional timeouts so use 100ms
+		 * instead.
+		 */
+		if (intel_de_wait_for_clear(i915, IPS_CTL, IPS_ENABLE, 100))
+			drm_err(&i915->drm,
+				"Timed out waiting for IPS disable\n");
+	} else {
+		intel_de_write(i915, IPS_CTL, 0);
+		intel_de_posting_read(i915, IPS_CTL);
+	}
+
+	/* We need to wait for a vblank before we can disable the plane. */
+	need_vblank_wait = true;
+
+	return need_vblank_wait;
+}
+
+static bool hsw_ips_need_disable(struct intel_atomic_state *state,
+				 struct intel_crtc *crtc)
+{
+	struct drm_i915_private *i915 = to_i915(state->base.dev);
+	const struct intel_crtc_state *old_crtc_state =
+		intel_atomic_get_old_crtc_state(state, crtc);
+	const struct intel_crtc_state *new_crtc_state =
+		intel_atomic_get_new_crtc_state(state, crtc);
+
+	if (!old_crtc_state->ips_enabled)
+		return false;
+
+	if (intel_crtc_needs_modeset(new_crtc_state))
+		return true;
+
+	/*
+	 * Workaround : Do not read or write the pipe palette/gamma data while
+	 * GAMMA_MODE is configured for split gamma and IPS_CTL has IPS enabled.
+	 *
+	 * Disable IPS before we program the LUT.
+	 */
+	if (IS_HASWELL(i915) &&
+	    (new_crtc_state->uapi.color_mgmt_changed ||
+	     new_crtc_state->update_pipe) &&
+	    new_crtc_state->gamma_mode == GAMMA_MODE_MODE_SPLIT)
+		return true;
+
+	return !new_crtc_state->ips_enabled;
+}
+
+bool hsw_ips_pre_update(struct intel_atomic_state *state,
+			struct intel_crtc *crtc)
+{
+	const struct intel_crtc_state *old_crtc_state =
+		intel_atomic_get_old_crtc_state(state, crtc);
+
+	if (!hsw_ips_need_disable(state, crtc))
+		return false;
+
+	return hsw_ips_disable(old_crtc_state);
+}
+
+static bool hsw_ips_need_enable(struct intel_atomic_state *state,
+				struct intel_crtc *crtc)
+{
+	struct drm_i915_private *i915 = to_i915(state->base.dev);
+	const struct intel_crtc_state *old_crtc_state =
+		intel_atomic_get_old_crtc_state(state, crtc);
+	const struct intel_crtc_state *new_crtc_state =
+		intel_atomic_get_new_crtc_state(state, crtc);
+
+	if (!new_crtc_state->ips_enabled)
+		return false;
+
+	if (intel_crtc_needs_modeset(new_crtc_state))
+		return true;
+
+	/*
+	 * Workaround : Do not read or write the pipe palette/gamma data while
+	 * GAMMA_MODE is configured for split gamma and IPS_CTL has IPS enabled.
+	 *
+	 * Re-enable IPS after the LUT has been programmed.
+	 */
+	if (IS_HASWELL(i915) &&
+	    (new_crtc_state->uapi.color_mgmt_changed ||
+	     new_crtc_state->update_pipe) &&
+	    new_crtc_state->gamma_mode == GAMMA_MODE_MODE_SPLIT)
+		return true;
+
+	/*
+	 * We can't read out IPS on broadwell, assume the worst and
+	 * forcibly enable IPS on the first fastset.
+	 */
+	if (new_crtc_state->update_pipe && old_crtc_state->inherited)
+		return true;
+
+	return !old_crtc_state->ips_enabled;
+}
+
+void hsw_ips_post_update(struct intel_atomic_state *state,
+			 struct intel_crtc *crtc)
+{
+	const struct intel_crtc_state *new_crtc_state =
+		intel_atomic_get_new_crtc_state(state, crtc);
+
+	if (!hsw_ips_need_enable(state, crtc))
+		return;
+
+	hsw_ips_enable(new_crtc_state);
+}
+
+/* IPS only exists on ULT machines and is tied to pipe A. */
+bool hsw_crtc_supports_ips(struct intel_crtc *crtc)
+{
+	return HAS_IPS(to_i915(crtc->base.dev)) && crtc->pipe == PIPE_A;
+}
+
+bool hsw_crtc_state_ips_capable(const struct intel_crtc_state *crtc_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
+
+	/* IPS only exists on ULT machines and is tied to pipe A. */
+	if (!hsw_crtc_supports_ips(crtc))
+		return false;
+
+	if (!i915->params.enable_ips)
+		return false;
+
+	if (crtc_state->pipe_bpp > 24)
+		return false;
+
+	/*
+	 * We compare against max which means we must take
+	 * the increased cdclk requirement into account when
+	 * calculating the new cdclk.
+	 *
+	 * Should measure whether using a lower cdclk w/o IPS
+	 */
+	if (IS_BROADWELL(i915) &&
+	    crtc_state->pixel_rate > i915->max_cdclk_freq * 95 / 100)
+		return false;
+
+	return true;
+}
+
+int hsw_ips_compute_config(struct intel_atomic_state *state,
+			   struct intel_crtc *crtc)
+{
+	struct drm_i915_private *i915 = to_i915(state->base.dev);
+	struct intel_crtc_state *crtc_state =
+		intel_atomic_get_new_crtc_state(state, crtc);
+
+	crtc_state->ips_enabled = false;
+
+	if (!hsw_crtc_state_ips_capable(crtc_state))
+		return 0;
+
+	/*
+	 * When IPS gets enabled, the pipe CRC changes. Since IPS gets
+	 * enabled and disabled dynamically based on package C states,
+	 * user space can't make reliable use of the CRCs, so let's just
+	 * completely disable it.
+	 */
+	if (crtc_state->crc_enabled)
+		return 0;
+
+	/* IPS should be fine as long as at least one plane is enabled. */
+	if (!(crtc_state->active_planes & ~BIT(PLANE_CURSOR)))
+		return 0;
+
+	if (IS_BROADWELL(i915)) {
+		const struct intel_cdclk_state *cdclk_state;
+
+		cdclk_state = intel_atomic_get_cdclk_state(state);
+		if (IS_ERR(cdclk_state))
+			return PTR_ERR(cdclk_state);
+
+		/* pixel rate mustn't exceed 95% of cdclk with IPS on BDW */
+		if (crtc_state->pixel_rate > cdclk_state->logical.cdclk * 95 / 100)
+			return 0;
+	}
+
+	crtc_state->ips_enabled = true;
+
+	return 0;
+}
+
+void hsw_ips_get_config(struct intel_crtc_state *crtc_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
+
+	if (!hsw_crtc_supports_ips(crtc))
+		return;
+
+	if (IS_HASWELL(i915)) {
+		crtc_state->ips_enabled = intel_de_read(i915, IPS_CTL) & IPS_ENABLE;
+	} else {
+		/*
+		 * We cannot readout IPS state on broadwell, set to
+		 * true so we can set it to a defined state on first
+		 * commit.
+		 */
+		crtc_state->ips_enabled = true;
+	}
+}
diff --git a/drivers/gpu/drm/i915/display/hsw_ips.h b/drivers/gpu/drm/i915/display/hsw_ips.h
new file mode 100644
index 0000000000000000000000000000000000000000..4564dee497d757cf0e0a38482f03b1694858c85e
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/hsw_ips.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef __HSW_IPS_H__
+#define __HSW_IPS_H__
+
+#include <linux/types.h>
+
+struct intel_atomic_state;
+struct intel_crtc;
+struct intel_crtc_state;
+
+bool hsw_ips_disable(const struct intel_crtc_state *crtc_state);
+bool hsw_ips_pre_update(struct intel_atomic_state *state,
+			struct intel_crtc *crtc);
+void hsw_ips_post_update(struct intel_atomic_state *state,
+			 struct intel_crtc *crtc);
+bool hsw_crtc_supports_ips(struct intel_crtc *crtc);
+bool hsw_crtc_state_ips_capable(const struct intel_crtc_state *crtc_state);
+int hsw_ips_compute_config(struct intel_atomic_state *state,
+			   struct intel_crtc *crtc);
+void hsw_ips_get_config(struct intel_crtc_state *crtc_state);
+
+#endif /* __HSW_IPS_H__ */
diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c
index 2d5bb9195b20fdacb793c85fc08226da62df8e7c..13b07c6fd6beb509466a03028af74acc85dfd9e3 100644
--- a/drivers/gpu/drm/i915/display/icl_dsi.c
+++ b/drivers/gpu/drm/i915/display/icl_dsi.c
@@ -29,6 +29,7 @@
 #include <drm/drm_mipi_dsi.h>
 
 #include "icl_dsi.h"
+#include "icl_dsi_regs.h"
 #include "intel_atomic.h"
 #include "intel_backlight.h"
 #include "intel_combo_phy.h"
@@ -570,7 +571,7 @@ gen11_dsi_setup_dphy_timings(struct intel_encoder *encoder,
 	/* Program T-INIT master registers */
 	for_each_dsi_port(port, intel_dsi->ports) {
 		tmp = intel_de_read(dev_priv, ICL_DSI_T_INIT_MASTER(port));
-		tmp &= ~MASTER_INIT_TIMER_MASK;
+		tmp &= ~DSI_T_INIT_MASTER_MASK;
 		tmp |= intel_dsi->init_count;
 		intel_de_write(dev_priv, ICL_DSI_T_INIT_MASTER(port), tmp);
 	}
@@ -788,14 +789,14 @@ gen11_dsi_configure_transcoder(struct intel_encoder *encoder,
 		/* program DSI operation mode */
 		if (is_vid_mode(intel_dsi)) {
 			tmp &= ~OP_MODE_MASK;
-			switch (intel_dsi->video_mode_format) {
+			switch (intel_dsi->video_mode) {
 			default:
-				MISSING_CASE(intel_dsi->video_mode_format);
+				MISSING_CASE(intel_dsi->video_mode);
 				fallthrough;
-			case VIDEO_MODE_NON_BURST_WITH_SYNC_EVENTS:
+			case NON_BURST_SYNC_EVENTS:
 				tmp |= VIDEO_MODE_SYNC_EVENT;
 				break;
-			case VIDEO_MODE_NON_BURST_WITH_SYNC_PULSE:
+			case NON_BURST_SYNC_PULSE:
 				tmp |= VIDEO_MODE_SYNC_PULSE;
 				break;
 			}
@@ -960,8 +961,7 @@ gen11_dsi_set_transcoder_timings(struct intel_encoder *encoder,
 
 	/* TRANS_HSYNC register to be programmed only for video mode */
 	if (is_vid_mode(intel_dsi)) {
-		if (intel_dsi->video_mode_format ==
-		    VIDEO_MODE_NON_BURST_WITH_SYNC_PULSE) {
+		if (intel_dsi->video_mode == NON_BURST_SYNC_PULSE) {
 			/* BSPEC: hsync size should be atleast 16 pixels */
 			if (hsync_size < 16)
 				drm_err(&dev_priv->drm,
diff --git a/drivers/gpu/drm/i915/display/icl_dsi_regs.h b/drivers/gpu/drm/i915/display/icl_dsi_regs.h
new file mode 100644
index 0000000000000000000000000000000000000000..f78f28b8dd94562eaca7b7ab465be3d5cf350b5e
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/icl_dsi_regs.h
@@ -0,0 +1,342 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef __ICL_DSI_REGS_H__
+#define __ICL_DSI_REGS_H__
+
+#include "i915_reg_defs.h"
+
+/* Gen11 DSI */
+#define _MMIO_DSI(tc, dsi0, dsi1)	_MMIO_TRANS((tc) - TRANSCODER_DSI_0, \
+						    dsi0, dsi1)
+#define _ICL_DSI_ESC_CLK_DIV0		0x6b090
+#define _ICL_DSI_ESC_CLK_DIV1		0x6b890
+#define ICL_DSI_ESC_CLK_DIV(port)	_MMIO_PORT((port),	\
+							_ICL_DSI_ESC_CLK_DIV0, \
+							_ICL_DSI_ESC_CLK_DIV1)
+#define _ICL_DPHY_ESC_CLK_DIV0		0x162190
+#define _ICL_DPHY_ESC_CLK_DIV1		0x6C190
+#define ICL_DPHY_ESC_CLK_DIV(port)	_MMIO_PORT((port),	\
+						_ICL_DPHY_ESC_CLK_DIV0, \
+						_ICL_DPHY_ESC_CLK_DIV1)
+#define  ICL_BYTE_CLK_PER_ESC_CLK_MASK		(0x1f << 16)
+#define  ICL_BYTE_CLK_PER_ESC_CLK_SHIFT	16
+#define  ICL_ESC_CLK_DIV_MASK			0x1ff
+#define  ICL_ESC_CLK_DIV_SHIFT			0
+#define DSI_MAX_ESC_CLK			20000		/* in KHz */
+
+#define _ADL_MIPIO_REG			0x180
+#define ADL_MIPIO_DW(port, dw)		_MMIO(_ICL_COMBOPHY(port) + _ADL_MIPIO_REG + 4 * (dw))
+#define   TX_ESC_CLK_DIV_PHY_SEL	REGBIT(16)
+#define   TX_ESC_CLK_DIV_PHY_MASK	REG_GENMASK(23, 16)
+#define   TX_ESC_CLK_DIV_PHY		REG_FIELD_PREP(TX_ESC_CLK_DIV_PHY_MASK, 0x7f)
+
+#define _DSI_CMD_FRMCTL_0		0x6b034
+#define _DSI_CMD_FRMCTL_1		0x6b834
+#define DSI_CMD_FRMCTL(port)		_MMIO_PORT(port,	\
+						   _DSI_CMD_FRMCTL_0,\
+						   _DSI_CMD_FRMCTL_1)
+#define   DSI_FRAME_UPDATE_REQUEST		(1 << 31)
+#define   DSI_PERIODIC_FRAME_UPDATE_ENABLE	(1 << 29)
+#define   DSI_NULL_PACKET_ENABLE		(1 << 28)
+#define   DSI_FRAME_IN_PROGRESS			(1 << 0)
+
+#define _DSI_INTR_MASK_REG_0		0x6b070
+#define _DSI_INTR_MASK_REG_1		0x6b870
+#define DSI_INTR_MASK_REG(port)		_MMIO_PORT(port,	\
+						   _DSI_INTR_MASK_REG_0,\
+						   _DSI_INTR_MASK_REG_1)
+
+#define _DSI_INTR_IDENT_REG_0		0x6b074
+#define _DSI_INTR_IDENT_REG_1		0x6b874
+#define DSI_INTR_IDENT_REG(port)	_MMIO_PORT(port,	\
+						   _DSI_INTR_IDENT_REG_0,\
+						   _DSI_INTR_IDENT_REG_1)
+#define   DSI_TE_EVENT				(1 << 31)
+#define   DSI_RX_DATA_OR_BTA_TERMINATED		(1 << 30)
+#define   DSI_TX_DATA				(1 << 29)
+#define   DSI_ULPS_ENTRY_DONE			(1 << 28)
+#define   DSI_NON_TE_TRIGGER_RECEIVED		(1 << 27)
+#define   DSI_HOST_CHKSUM_ERROR			(1 << 26)
+#define   DSI_HOST_MULTI_ECC_ERROR		(1 << 25)
+#define   DSI_HOST_SINGL_ECC_ERROR		(1 << 24)
+#define   DSI_HOST_CONTENTION_DETECTED		(1 << 23)
+#define   DSI_HOST_FALSE_CONTROL_ERROR		(1 << 22)
+#define   DSI_HOST_TIMEOUT_ERROR		(1 << 21)
+#define   DSI_HOST_LOW_POWER_TX_SYNC_ERROR	(1 << 20)
+#define   DSI_HOST_ESCAPE_MODE_ENTRY_ERROR	(1 << 19)
+#define   DSI_FRAME_UPDATE_DONE			(1 << 16)
+#define   DSI_PROTOCOL_VIOLATION_REPORTED	(1 << 15)
+#define   DSI_INVALID_TX_LENGTH			(1 << 13)
+#define   DSI_INVALID_VC			(1 << 12)
+#define   DSI_INVALID_DATA_TYPE			(1 << 11)
+#define   DSI_PERIPHERAL_CHKSUM_ERROR		(1 << 10)
+#define   DSI_PERIPHERAL_MULTI_ECC_ERROR	(1 << 9)
+#define   DSI_PERIPHERAL_SINGLE_ECC_ERROR	(1 << 8)
+#define   DSI_PERIPHERAL_CONTENTION_DETECTED	(1 << 7)
+#define   DSI_PERIPHERAL_FALSE_CTRL_ERROR	(1 << 6)
+#define   DSI_PERIPHERAL_TIMEOUT_ERROR		(1 << 5)
+#define   DSI_PERIPHERAL_LP_TX_SYNC_ERROR	(1 << 4)
+#define   DSI_PERIPHERAL_ESC_MODE_ENTRY_CMD_ERR	(1 << 3)
+#define   DSI_EOT_SYNC_ERROR			(1 << 2)
+#define   DSI_SOT_SYNC_ERROR			(1 << 1)
+#define   DSI_SOT_ERROR				(1 << 0)
+
+/* ICL DSI MODE control */
+#define _ICL_DSI_IO_MODECTL_0				0x6B094
+#define _ICL_DSI_IO_MODECTL_1				0x6B894
+#define ICL_DSI_IO_MODECTL(port)	_MMIO_PORT(port,	\
+						    _ICL_DSI_IO_MODECTL_0, \
+						    _ICL_DSI_IO_MODECTL_1)
+#define  COMBO_PHY_MODE_DSI				(1 << 0)
+
+/* TGL DSI Chicken register */
+#define _TGL_DSI_CHKN_REG_0			0x6B0C0
+#define _TGL_DSI_CHKN_REG_1			0x6B8C0
+#define TGL_DSI_CHKN_REG(port)		_MMIO_PORT(port,	\
+						    _TGL_DSI_CHKN_REG_0, \
+						    _TGL_DSI_CHKN_REG_1)
+#define TGL_DSI_CHKN_LSHS_GB_MASK		REG_GENMASK(15, 12)
+#define TGL_DSI_CHKN_LSHS_GB(byte_clocks)	REG_FIELD_PREP(TGL_DSI_CHKN_LSHS_GB_MASK, \
+							       (byte_clocks))
+#define _ICL_DSI_T_INIT_MASTER_0	0x6b088
+#define _ICL_DSI_T_INIT_MASTER_1	0x6b888
+#define ICL_DSI_T_INIT_MASTER(port)	_MMIO_PORT(port,	\
+						   _ICL_DSI_T_INIT_MASTER_0,\
+						   _ICL_DSI_T_INIT_MASTER_1)
+#define   DSI_T_INIT_MASTER_MASK	REG_GENMASK(15, 0)
+
+#define _DPHY_CLK_TIMING_PARAM_0	0x162180
+#define _DPHY_CLK_TIMING_PARAM_1	0x6c180
+#define DPHY_CLK_TIMING_PARAM(port)	_MMIO_PORT(port,	\
+						   _DPHY_CLK_TIMING_PARAM_0,\
+						   _DPHY_CLK_TIMING_PARAM_1)
+#define _DSI_CLK_TIMING_PARAM_0		0x6b080
+#define _DSI_CLK_TIMING_PARAM_1		0x6b880
+#define DSI_CLK_TIMING_PARAM(port)	_MMIO_PORT(port,	\
+						   _DSI_CLK_TIMING_PARAM_0,\
+						   _DSI_CLK_TIMING_PARAM_1)
+#define  CLK_PREPARE_OVERRIDE		(1 << 31)
+#define  CLK_PREPARE(x)		((x) << 28)
+#define  CLK_PREPARE_MASK		(0x7 << 28)
+#define  CLK_PREPARE_SHIFT		28
+#define  CLK_ZERO_OVERRIDE		(1 << 27)
+#define  CLK_ZERO(x)			((x) << 20)
+#define  CLK_ZERO_MASK			(0xf << 20)
+#define  CLK_ZERO_SHIFT		20
+#define  CLK_PRE_OVERRIDE		(1 << 19)
+#define  CLK_PRE(x)			((x) << 16)
+#define  CLK_PRE_MASK			(0x3 << 16)
+#define  CLK_PRE_SHIFT			16
+#define  CLK_POST_OVERRIDE		(1 << 15)
+#define  CLK_POST(x)			((x) << 8)
+#define  CLK_POST_MASK			(0x7 << 8)
+#define  CLK_POST_SHIFT		8
+#define  CLK_TRAIL_OVERRIDE		(1 << 7)
+#define  CLK_TRAIL(x)			((x) << 0)
+#define  CLK_TRAIL_MASK		(0xf << 0)
+#define  CLK_TRAIL_SHIFT		0
+
+#define _DPHY_DATA_TIMING_PARAM_0	0x162184
+#define _DPHY_DATA_TIMING_PARAM_1	0x6c184
+#define DPHY_DATA_TIMING_PARAM(port)	_MMIO_PORT(port,	\
+						   _DPHY_DATA_TIMING_PARAM_0,\
+						   _DPHY_DATA_TIMING_PARAM_1)
+#define _DSI_DATA_TIMING_PARAM_0	0x6B084
+#define _DSI_DATA_TIMING_PARAM_1	0x6B884
+#define DSI_DATA_TIMING_PARAM(port)	_MMIO_PORT(port,	\
+						   _DSI_DATA_TIMING_PARAM_0,\
+						   _DSI_DATA_TIMING_PARAM_1)
+#define  HS_PREPARE_OVERRIDE		(1 << 31)
+#define  HS_PREPARE(x)			((x) << 24)
+#define  HS_PREPARE_MASK		(0x7 << 24)
+#define  HS_PREPARE_SHIFT		24
+#define  HS_ZERO_OVERRIDE		(1 << 23)
+#define  HS_ZERO(x)			((x) << 16)
+#define  HS_ZERO_MASK			(0xf << 16)
+#define  HS_ZERO_SHIFT			16
+#define  HS_TRAIL_OVERRIDE		(1 << 15)
+#define  HS_TRAIL(x)			((x) << 8)
+#define  HS_TRAIL_MASK			(0x7 << 8)
+#define  HS_TRAIL_SHIFT		8
+#define  HS_EXIT_OVERRIDE		(1 << 7)
+#define  HS_EXIT(x)			((x) << 0)
+#define  HS_EXIT_MASK			(0x7 << 0)
+#define  HS_EXIT_SHIFT			0
+
+#define _DPHY_TA_TIMING_PARAM_0		0x162188
+#define _DPHY_TA_TIMING_PARAM_1		0x6c188
+#define DPHY_TA_TIMING_PARAM(port)	_MMIO_PORT(port,	\
+						   _DPHY_TA_TIMING_PARAM_0,\
+						   _DPHY_TA_TIMING_PARAM_1)
+#define _DSI_TA_TIMING_PARAM_0		0x6b098
+#define _DSI_TA_TIMING_PARAM_1		0x6b898
+#define DSI_TA_TIMING_PARAM(port)	_MMIO_PORT(port,	\
+						   _DSI_TA_TIMING_PARAM_0,\
+						   _DSI_TA_TIMING_PARAM_1)
+#define  TA_SURE_OVERRIDE		(1 << 31)
+#define  TA_SURE(x)			((x) << 16)
+#define  TA_SURE_MASK			(0x1f << 16)
+#define  TA_SURE_SHIFT			16
+#define  TA_GO_OVERRIDE		(1 << 15)
+#define  TA_GO(x)			((x) << 8)
+#define  TA_GO_MASK			(0xf << 8)
+#define  TA_GO_SHIFT			8
+#define  TA_GET_OVERRIDE		(1 << 7)
+#define  TA_GET(x)			((x) << 0)
+#define  TA_GET_MASK			(0xf << 0)
+#define  TA_GET_SHIFT			0
+
+/* DSI transcoder configuration */
+#define _DSI_TRANS_FUNC_CONF_0		0x6b030
+#define _DSI_TRANS_FUNC_CONF_1		0x6b830
+#define DSI_TRANS_FUNC_CONF(tc)		_MMIO_DSI(tc,	\
+						  _DSI_TRANS_FUNC_CONF_0,\
+						  _DSI_TRANS_FUNC_CONF_1)
+#define  OP_MODE_MASK			(0x3 << 28)
+#define  OP_MODE_SHIFT			28
+#define  CMD_MODE_NO_GATE		(0x0 << 28)
+#define  CMD_MODE_TE_GATE		(0x1 << 28)
+#define  VIDEO_MODE_SYNC_EVENT		(0x2 << 28)
+#define  VIDEO_MODE_SYNC_PULSE		(0x3 << 28)
+#define  TE_SOURCE_GPIO			(1 << 27)
+#define  LINK_READY			(1 << 20)
+#define  PIX_FMT_MASK			(0x3 << 16)
+#define  PIX_FMT_SHIFT			16
+#define  PIX_FMT_RGB565			(0x0 << 16)
+#define  PIX_FMT_RGB666_PACKED		(0x1 << 16)
+#define  PIX_FMT_RGB666_LOOSE		(0x2 << 16)
+#define  PIX_FMT_RGB888			(0x3 << 16)
+#define  PIX_FMT_RGB101010		(0x4 << 16)
+#define  PIX_FMT_RGB121212		(0x5 << 16)
+#define  PIX_FMT_COMPRESSED		(0x6 << 16)
+#define  BGR_TRANSMISSION		(1 << 15)
+#define  PIX_VIRT_CHAN(x)		((x) << 12)
+#define  PIX_VIRT_CHAN_MASK		(0x3 << 12)
+#define  PIX_VIRT_CHAN_SHIFT		12
+#define  PIX_BUF_THRESHOLD_MASK		(0x3 << 10)
+#define  PIX_BUF_THRESHOLD_SHIFT	10
+#define  PIX_BUF_THRESHOLD_1_4		(0x0 << 10)
+#define  PIX_BUF_THRESHOLD_1_2		(0x1 << 10)
+#define  PIX_BUF_THRESHOLD_3_4		(0x2 << 10)
+#define  PIX_BUF_THRESHOLD_FULL		(0x3 << 10)
+#define  CONTINUOUS_CLK_MASK		(0x3 << 8)
+#define  CONTINUOUS_CLK_SHIFT		8
+#define  CLK_ENTER_LP_AFTER_DATA	(0x0 << 8)
+#define  CLK_HS_OR_LP			(0x2 << 8)
+#define  CLK_HS_CONTINUOUS		(0x3 << 8)
+#define  LINK_CALIBRATION_MASK		(0x3 << 4)
+#define  LINK_CALIBRATION_SHIFT		4
+#define  CALIBRATION_DISABLED		(0x0 << 4)
+#define  CALIBRATION_ENABLED_INITIAL_ONLY	(0x2 << 4)
+#define  CALIBRATION_ENABLED_INITIAL_PERIODIC	(0x3 << 4)
+#define  BLANKING_PACKET_ENABLE		(1 << 2)
+#define  S3D_ORIENTATION_LANDSCAPE	(1 << 1)
+#define  EOTP_DISABLED			(1 << 0)
+
+#define _DSI_CMD_RXCTL_0		0x6b0d4
+#define _DSI_CMD_RXCTL_1		0x6b8d4
+#define DSI_CMD_RXCTL(tc)		_MMIO_DSI(tc,	\
+						  _DSI_CMD_RXCTL_0,\
+						  _DSI_CMD_RXCTL_1)
+#define  READ_UNLOADS_DW		(1 << 16)
+#define  RECEIVED_UNASSIGNED_TRIGGER	(1 << 15)
+#define  RECEIVED_ACKNOWLEDGE_TRIGGER	(1 << 14)
+#define  RECEIVED_TEAR_EFFECT_TRIGGER	(1 << 13)
+#define  RECEIVED_RESET_TRIGGER		(1 << 12)
+#define  RECEIVED_PAYLOAD_WAS_LOST	(1 << 11)
+#define  RECEIVED_CRC_WAS_LOST		(1 << 10)
+#define  NUMBER_RX_PLOAD_DW_MASK	(0xff << 0)
+#define  NUMBER_RX_PLOAD_DW_SHIFT	0
+
+#define _DSI_CMD_TXCTL_0		0x6b0d0
+#define _DSI_CMD_TXCTL_1		0x6b8d0
+#define DSI_CMD_TXCTL(tc)		_MMIO_DSI(tc,	\
+						  _DSI_CMD_TXCTL_0,\
+						  _DSI_CMD_TXCTL_1)
+#define  KEEP_LINK_IN_HS		(1 << 24)
+#define  FREE_HEADER_CREDIT_MASK	(0x1f << 8)
+#define  FREE_HEADER_CREDIT_SHIFT	0x8
+#define  FREE_PLOAD_CREDIT_MASK		(0xff << 0)
+#define  FREE_PLOAD_CREDIT_SHIFT	0
+#define  MAX_HEADER_CREDIT		0x10
+#define  MAX_PLOAD_CREDIT		0x40
+
+#define _DSI_CMD_TXHDR_0		0x6b100
+#define _DSI_CMD_TXHDR_1		0x6b900
+#define DSI_CMD_TXHDR(tc)		_MMIO_DSI(tc,	\
+						  _DSI_CMD_TXHDR_0,\
+						  _DSI_CMD_TXHDR_1)
+#define  PAYLOAD_PRESENT		(1 << 31)
+#define  LP_DATA_TRANSFER		(1 << 30)
+#define  VBLANK_FENCE			(1 << 29)
+#define  PARAM_WC_MASK			(0xffff << 8)
+#define  PARAM_WC_LOWER_SHIFT		8
+#define  PARAM_WC_UPPER_SHIFT		16
+#define  VC_MASK			(0x3 << 6)
+#define  VC_SHIFT			6
+#define  DT_MASK			(0x3f << 0)
+#define  DT_SHIFT			0
+
+#define _DSI_CMD_TXPYLD_0		0x6b104
+#define _DSI_CMD_TXPYLD_1		0x6b904
+#define DSI_CMD_TXPYLD(tc)		_MMIO_DSI(tc,	\
+						  _DSI_CMD_TXPYLD_0,\
+						  _DSI_CMD_TXPYLD_1)
+
+#define _DSI_LP_MSG_0			0x6b0d8
+#define _DSI_LP_MSG_1			0x6b8d8
+#define DSI_LP_MSG(tc)			_MMIO_DSI(tc,	\
+						  _DSI_LP_MSG_0,\
+						  _DSI_LP_MSG_1)
+#define  LPTX_IN_PROGRESS		(1 << 17)
+#define  LINK_IN_ULPS			(1 << 16)
+#define  LINK_ULPS_TYPE_LP11		(1 << 8)
+#define  LINK_ENTER_ULPS		(1 << 0)
+
+/* DSI timeout registers */
+#define _DSI_HSTX_TO_0			0x6b044
+#define _DSI_HSTX_TO_1			0x6b844
+#define DSI_HSTX_TO(tc)			_MMIO_DSI(tc,	\
+						  _DSI_HSTX_TO_0,\
+						  _DSI_HSTX_TO_1)
+#define  HSTX_TIMEOUT_VALUE_MASK	(0xffff << 16)
+#define  HSTX_TIMEOUT_VALUE_SHIFT	16
+#define  HSTX_TIMEOUT_VALUE(x)		((x) << 16)
+#define  HSTX_TIMED_OUT			(1 << 0)
+
+#define _DSI_LPRX_HOST_TO_0		0x6b048
+#define _DSI_LPRX_HOST_TO_1		0x6b848
+#define DSI_LPRX_HOST_TO(tc)		_MMIO_DSI(tc,	\
+						  _DSI_LPRX_HOST_TO_0,\
+						  _DSI_LPRX_HOST_TO_1)
+#define  LPRX_TIMED_OUT			(1 << 16)
+#define  LPRX_TIMEOUT_VALUE_MASK	(0xffff << 0)
+#define  LPRX_TIMEOUT_VALUE_SHIFT	0
+#define  LPRX_TIMEOUT_VALUE(x)		((x) << 0)
+
+#define _DSI_PWAIT_TO_0			0x6b040
+#define _DSI_PWAIT_TO_1			0x6b840
+#define DSI_PWAIT_TO(tc)		_MMIO_DSI(tc,	\
+						  _DSI_PWAIT_TO_0,\
+						  _DSI_PWAIT_TO_1)
+#define  PRESET_TIMEOUT_VALUE_MASK	(0xffff << 16)
+#define  PRESET_TIMEOUT_VALUE_SHIFT	16
+#define  PRESET_TIMEOUT_VALUE(x)	((x) << 16)
+#define  PRESPONSE_TIMEOUT_VALUE_MASK	(0xffff << 0)
+#define  PRESPONSE_TIMEOUT_VALUE_SHIFT	0
+#define  PRESPONSE_TIMEOUT_VALUE(x)	((x) << 0)
+
+#define _DSI_TA_TO_0			0x6b04c
+#define _DSI_TA_TO_1			0x6b84c
+#define DSI_TA_TO(tc)			_MMIO_DSI(tc,	\
+						  _DSI_TA_TO_0,\
+						  _DSI_TA_TO_1)
+#define  TA_TIMED_OUT			(1 << 16)
+#define  TA_TIMEOUT_VALUE_MASK		(0xffff << 0)
+#define  TA_TIMEOUT_VALUE_SHIFT		0
+#define  TA_TIMEOUT_VALUE(x)		((x) << 0)
+
+#endif /* __ICL_DSI_REGS_H__ */
diff --git a/drivers/gpu/drm/i915/display/intel_atomic.c b/drivers/gpu/drm/i915/display/intel_atomic.c
index 0939040651120e40428dbb0d3391280cf17df41e..e0667d163266bd36e4f0059382cd07d0a5c2949f 100644
--- a/drivers/gpu/drm/i915/display/intel_atomic.c
+++ b/drivers/gpu/drm/i915/display/intel_atomic.c
@@ -281,17 +281,6 @@ void intel_crtc_free_hw_state(struct intel_crtc_state *crtc_state)
 	intel_crtc_put_color_blobs(crtc_state);
 }
 
-void intel_crtc_copy_color_blobs(struct intel_crtc_state *crtc_state,
-				 const struct intel_crtc_state *from_crtc_state)
-{
-	drm_property_replace_blob(&crtc_state->hw.degamma_lut,
-				  from_crtc_state->uapi.degamma_lut);
-	drm_property_replace_blob(&crtc_state->hw.gamma_lut,
-				  from_crtc_state->uapi.gamma_lut);
-	drm_property_replace_blob(&crtc_state->hw.ctm,
-				  from_crtc_state->uapi.ctm);
-}
-
 /**
  * intel_crtc_destroy_state - destroy crtc state
  * @crtc: drm crtc
diff --git a/drivers/gpu/drm/i915/display/intel_atomic.h b/drivers/gpu/drm/i915/display/intel_atomic.h
index d2700c74c9dac0ad181b37b8fa32a3197d073bdc..1dc439983dd943092664f1d63f1ca336f9b5052f 100644
--- a/drivers/gpu/drm/i915/display/intel_atomic.h
+++ b/drivers/gpu/drm/i915/display/intel_atomic.h
@@ -44,8 +44,6 @@ struct drm_crtc_state *intel_crtc_duplicate_state(struct drm_crtc *crtc);
 void intel_crtc_destroy_state(struct drm_crtc *crtc,
 			       struct drm_crtc_state *state);
 void intel_crtc_free_hw_state(struct intel_crtc_state *crtc_state);
-void intel_crtc_copy_color_blobs(struct intel_crtc_state *crtc_state,
-				 const struct intel_crtc_state *from_crtc_state);
 struct drm_atomic_state *intel_atomic_state_alloc(struct drm_device *dev);
 void intel_atomic_state_free(struct drm_atomic_state *state);
 void intel_atomic_state_clear(struct drm_atomic_state *state);
diff --git a/drivers/gpu/drm/i915/display/intel_atomic_plane.c b/drivers/gpu/drm/i915/display/intel_atomic_plane.c
index bec02333bdebaa59aecf1b52ce4f744f2f48b6b0..c53aa6a4c7a0aacb72f8f841654db35b7c1c2d63 100644
--- a/drivers/gpu/drm/i915/display/intel_atomic_plane.c
+++ b/drivers/gpu/drm/i915/display/intel_atomic_plane.c
@@ -45,6 +45,7 @@
 #include "intel_fb_pin.h"
 #include "intel_pm.h"
 #include "intel_sprite.h"
+#include "skl_scaler.h"
 
 static void intel_plane_state_reset(struct intel_plane_state *plane_state,
 				    struct intel_plane *plane)
@@ -322,6 +323,7 @@ void intel_plane_set_invisible(struct intel_crtc_state *crtc_state,
 	struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane);
 
 	crtc_state->active_planes &= ~BIT(plane->id);
+	crtc_state->scaled_planes &= ~BIT(plane->id);
 	crtc_state->nv12_planes &= ~BIT(plane->id);
 	crtc_state->c8_planes &= ~BIT(plane->id);
 	crtc_state->data_rate[plane->id] = 0;
@@ -330,6 +332,185 @@ void intel_plane_set_invisible(struct intel_crtc_state *crtc_state,
 	plane_state->uapi.visible = false;
 }
 
+/* FIXME nuke when all wm code is atomic */
+static bool intel_wm_need_update(const struct intel_plane_state *cur,
+				 struct intel_plane_state *new)
+{
+	/* Update watermarks on tiling or size changes. */
+	if (new->uapi.visible != cur->uapi.visible)
+		return true;
+
+	if (!cur->hw.fb || !new->hw.fb)
+		return false;
+
+	if (cur->hw.fb->modifier != new->hw.fb->modifier ||
+	    cur->hw.rotation != new->hw.rotation ||
+	    drm_rect_width(&new->uapi.src) != drm_rect_width(&cur->uapi.src) ||
+	    drm_rect_height(&new->uapi.src) != drm_rect_height(&cur->uapi.src) ||
+	    drm_rect_width(&new->uapi.dst) != drm_rect_width(&cur->uapi.dst) ||
+	    drm_rect_height(&new->uapi.dst) != drm_rect_height(&cur->uapi.dst))
+		return true;
+
+	return false;
+}
+
+static bool intel_plane_is_scaled(const struct intel_plane_state *plane_state)
+{
+	int src_w = drm_rect_width(&plane_state->uapi.src) >> 16;
+	int src_h = drm_rect_height(&plane_state->uapi.src) >> 16;
+	int dst_w = drm_rect_width(&plane_state->uapi.dst);
+	int dst_h = drm_rect_height(&plane_state->uapi.dst);
+
+	return src_w != dst_w || src_h != dst_h;
+}
+
+static bool intel_plane_do_async_flip(struct intel_plane *plane,
+				      const struct intel_crtc_state *old_crtc_state,
+				      const struct intel_crtc_state *new_crtc_state)
+{
+	struct drm_i915_private *i915 = to_i915(plane->base.dev);
+
+	if (!plane->async_flip)
+		return false;
+
+	if (!new_crtc_state->uapi.async_flip)
+		return false;
+
+	/*
+	 * In platforms after DISPLAY13, we might need to override
+	 * first async flip in order to change watermark levels
+	 * as part of optimization.
+	 * So for those, we are checking if this is a first async flip.
+	 * For platforms earlier than DISPLAY13 we always do async flip.
+	 */
+	return DISPLAY_VER(i915) < 13 || old_crtc_state->uapi.async_flip;
+}
+
+static int intel_plane_atomic_calc_changes(const struct intel_crtc_state *old_crtc_state,
+					   struct intel_crtc_state *new_crtc_state,
+					   const struct intel_plane_state *old_plane_state,
+					   struct intel_plane_state *new_plane_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->uapi.crtc);
+	struct intel_plane *plane = to_intel_plane(new_plane_state->uapi.plane);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	bool mode_changed = intel_crtc_needs_modeset(new_crtc_state);
+	bool was_crtc_enabled = old_crtc_state->hw.active;
+	bool is_crtc_enabled = new_crtc_state->hw.active;
+	bool turn_off, turn_on, visible, was_visible;
+	int ret;
+
+	if (DISPLAY_VER(dev_priv) >= 9 && plane->id != PLANE_CURSOR) {
+		ret = skl_update_scaler_plane(new_crtc_state, new_plane_state);
+		if (ret)
+			return ret;
+	}
+
+	was_visible = old_plane_state->uapi.visible;
+	visible = new_plane_state->uapi.visible;
+
+	if (!was_crtc_enabled && drm_WARN_ON(&dev_priv->drm, was_visible))
+		was_visible = false;
+
+	/*
+	 * Visibility is calculated as if the crtc was on, but
+	 * after scaler setup everything depends on it being off
+	 * when the crtc isn't active.
+	 *
+	 * FIXME this is wrong for watermarks. Watermarks should also
+	 * be computed as if the pipe would be active. Perhaps move
+	 * per-plane wm computation to the .check_plane() hook, and
+	 * only combine the results from all planes in the current place?
+	 */
+	if (!is_crtc_enabled) {
+		intel_plane_set_invisible(new_crtc_state, new_plane_state);
+		visible = false;
+	}
+
+	if (!was_visible && !visible)
+		return 0;
+
+	turn_off = was_visible && (!visible || mode_changed);
+	turn_on = visible && (!was_visible || mode_changed);
+
+	drm_dbg_atomic(&dev_priv->drm,
+		       "[CRTC:%d:%s] with [PLANE:%d:%s] visible %i -> %i, off %i, on %i, ms %i\n",
+		       crtc->base.base.id, crtc->base.name,
+		       plane->base.base.id, plane->base.name,
+		       was_visible, visible,
+		       turn_off, turn_on, mode_changed);
+
+	if (turn_on) {
+		if (DISPLAY_VER(dev_priv) < 5 && !IS_G4X(dev_priv))
+			new_crtc_state->update_wm_pre = true;
+
+		/* must disable cxsr around plane enable/disable */
+		if (plane->id != PLANE_CURSOR)
+			new_crtc_state->disable_cxsr = true;
+	} else if (turn_off) {
+		if (DISPLAY_VER(dev_priv) < 5 && !IS_G4X(dev_priv))
+			new_crtc_state->update_wm_post = true;
+
+		/* must disable cxsr around plane enable/disable */
+		if (plane->id != PLANE_CURSOR)
+			new_crtc_state->disable_cxsr = true;
+	} else if (intel_wm_need_update(old_plane_state, new_plane_state)) {
+		if (DISPLAY_VER(dev_priv) < 5 && !IS_G4X(dev_priv)) {
+			/* FIXME bollocks */
+			new_crtc_state->update_wm_pre = true;
+			new_crtc_state->update_wm_post = true;
+		}
+	}
+
+	if (visible || was_visible)
+		new_crtc_state->fb_bits |= plane->frontbuffer_bit;
+
+	/*
+	 * ILK/SNB DVSACNTR/Sprite Enable
+	 * IVB SPR_CTL/Sprite Enable
+	 * "When in Self Refresh Big FIFO mode, a write to enable the
+	 *  plane will be internally buffered and delayed while Big FIFO
+	 *  mode is exiting."
+	 *
+	 * Which means that enabling the sprite can take an extra frame
+	 * when we start in big FIFO mode (LP1+). Thus we need to drop
+	 * down to LP0 and wait for vblank in order to make sure the
+	 * sprite gets enabled on the next vblank after the register write.
+	 * Doing otherwise would risk enabling the sprite one frame after
+	 * we've already signalled flip completion. We can resume LP1+
+	 * once the sprite has been enabled.
+	 *
+	 *
+	 * WaCxSRDisabledForSpriteScaling:ivb
+	 * IVB SPR_SCALE/Scaling Enable
+	 * "Low Power watermarks must be disabled for at least one
+	 *  frame before enabling sprite scaling, and kept disabled
+	 *  until sprite scaling is disabled."
+	 *
+	 * ILK/SNB DVSASCALE/Scaling Enable
+	 * "When in Self Refresh Big FIFO mode, scaling enable will be
+	 *  masked off while Big FIFO mode is exiting."
+	 *
+	 * Despite the w/a only being listed for IVB we assume that
+	 * the ILK/SNB note has similar ramifications, hence we apply
+	 * the w/a on all three platforms.
+	 *
+	 * With experimental results seems this is needed also for primary
+	 * plane, not only sprite plane.
+	 */
+	if (plane->id != PLANE_CURSOR &&
+	    (IS_IRONLAKE(dev_priv) || IS_SANDYBRIDGE(dev_priv) ||
+	     IS_IVYBRIDGE(dev_priv)) &&
+	    (turn_on || (!intel_plane_is_scaled(old_plane_state) &&
+			 intel_plane_is_scaled(new_plane_state))))
+		new_crtc_state->disable_lp_wm = true;
+
+	if (intel_plane_do_async_flip(plane, old_crtc_state, new_crtc_state))
+		new_plane_state->do_async_flip = true;
+
+	return 0;
+}
+
 int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_state,
 					struct intel_crtc_state *new_crtc_state,
 					const struct intel_plane_state *old_plane_state,
@@ -356,6 +537,10 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_
 	if (new_plane_state->uapi.visible)
 		new_crtc_state->active_planes |= BIT(plane->id);
 
+	if (new_plane_state->uapi.visible &&
+	    intel_plane_is_scaled(new_plane_state))
+		new_crtc_state->scaled_planes |= BIT(plane->id);
+
 	if (new_plane_state->uapi.visible &&
 	    intel_format_info_is_yuv_semiplanar(fb->format, fb->modifier))
 		new_crtc_state->nv12_planes |= BIT(plane->id);
@@ -403,10 +588,11 @@ int intel_plane_atomic_check(struct intel_atomic_state *state,
 	struct intel_crtc_state *new_crtc_state =
 		intel_atomic_get_new_crtc_state(state, crtc);
 
-	if (new_crtc_state && new_crtc_state->bigjoiner_slave) {
+	if (new_crtc_state && intel_crtc_is_bigjoiner_slave(new_crtc_state)) {
+		struct intel_crtc *master_crtc =
+			intel_master_crtc(new_crtc_state);
 		struct intel_plane *master_plane =
-			intel_crtc_get_plane(new_crtc_state->bigjoiner_linked_crtc,
-					     plane->id);
+			intel_crtc_get_plane(master_crtc, plane->id);
 
 		new_master_plane_state =
 			intel_atomic_get_new_plane_state(state, master_plane);
@@ -507,8 +693,8 @@ void intel_plane_disable_arm(struct intel_plane *plane,
 	plane->disable_arm(plane, crtc_state);
 }
 
-void intel_update_planes_on_crtc(struct intel_atomic_state *state,
-				 struct intel_crtc *crtc)
+void intel_crtc_planes_update_noarm(struct intel_atomic_state *state,
+				    struct intel_crtc *crtc)
 {
 	struct intel_crtc_state *new_crtc_state =
 		intel_atomic_get_new_crtc_state(state, crtc);
@@ -536,8 +722,8 @@ void intel_update_planes_on_crtc(struct intel_atomic_state *state,
 	}
 }
 
-void skl_arm_planes_on_crtc(struct intel_atomic_state *state,
-			    struct intel_crtc *crtc)
+static void skl_crtc_planes_update_arm(struct intel_atomic_state *state,
+				       struct intel_crtc *crtc)
 {
 	struct intel_crtc_state *old_crtc_state =
 		intel_atomic_get_old_crtc_state(state, crtc);
@@ -571,8 +757,8 @@ void skl_arm_planes_on_crtc(struct intel_atomic_state *state,
 	}
 }
 
-void i9xx_arm_planes_on_crtc(struct intel_atomic_state *state,
-			     struct intel_crtc *crtc)
+static void i9xx_crtc_planes_update_arm(struct intel_atomic_state *state,
+					struct intel_crtc *crtc)
 {
 	struct intel_crtc_state *new_crtc_state =
 		intel_atomic_get_new_crtc_state(state, crtc);
@@ -597,6 +783,17 @@ void i9xx_arm_planes_on_crtc(struct intel_atomic_state *state,
 	}
 }
 
+void intel_crtc_planes_update_arm(struct intel_atomic_state *state,
+				  struct intel_crtc *crtc)
+{
+	struct drm_i915_private *i915 = to_i915(state->base.dev);
+
+	if (DISPLAY_VER(i915) >= 9)
+		skl_crtc_planes_update_arm(state, crtc);
+	else
+		i9xx_crtc_planes_update_arm(state, crtc);
+}
+
 int intel_atomic_plane_check_clipping(struct intel_plane_state *plane_state,
 				      struct intel_crtc_state *crtc_state,
 				      int min_scale, int max_scale,
@@ -633,7 +830,7 @@ int intel_atomic_plane_check_clipping(struct intel_plane_state *plane_state,
 	}
 
 	/* right side of the image is on the slave crtc, adjust dst to match */
-	if (crtc_state->bigjoiner_slave)
+	if (intel_crtc_is_bigjoiner_slave(crtc_state))
 		drm_rect_translate(dst, -crtc_state->pipe_src_w, 0);
 
 	/*
diff --git a/drivers/gpu/drm/i915/display/intel_atomic_plane.h b/drivers/gpu/drm/i915/display/intel_atomic_plane.h
index ead789709477ff38d83d3022c3686a65ba71c628..f4763a53541e2371c3d4791f2a93577747e6e659 100644
--- a/drivers/gpu/drm/i915/display/intel_atomic_plane.h
+++ b/drivers/gpu/drm/i915/display/intel_atomic_plane.h
@@ -44,22 +44,16 @@ void intel_plane_free(struct intel_plane *plane);
 struct drm_plane_state *intel_plane_duplicate_state(struct drm_plane *plane);
 void intel_plane_destroy_state(struct drm_plane *plane,
 			       struct drm_plane_state *state);
-void intel_update_planes_on_crtc(struct intel_atomic_state *state,
-				 struct intel_crtc *crtc);
-void skl_arm_planes_on_crtc(struct intel_atomic_state *state,
-			    struct intel_crtc *crtc);
-void i9xx_arm_planes_on_crtc(struct intel_atomic_state *state,
-			     struct intel_crtc *crtc);
+void intel_crtc_planes_update_noarm(struct intel_atomic_state *state,
+				    struct intel_crtc *crtc);
+void intel_crtc_planes_update_arm(struct intel_atomic_state *state,
+				  struct intel_crtc *crtc);
 int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_state,
 					struct intel_crtc_state *crtc_state,
 					const struct intel_plane_state *old_plane_state,
 					struct intel_plane_state *intel_state);
 int intel_plane_atomic_check(struct intel_atomic_state *state,
 			     struct intel_plane *plane);
-int intel_plane_atomic_calc_changes(const struct intel_crtc_state *old_crtc_state,
-				    struct intel_crtc_state *crtc_state,
-				    const struct intel_plane_state *old_plane_state,
-				    struct intel_plane_state *plane_state);
 int intel_plane_calc_min_cdclk(struct intel_atomic_state *state,
 			       struct intel_plane *plane,
 			       bool *need_cdclk_calc);
diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c
index aec0efd5350ef93017fa75629e950695cec515ac..40b5e7ed12c20972beaf653a4bff20632e1d67e9 100644
--- a/drivers/gpu/drm/i915/display/intel_bios.c
+++ b/drivers/gpu/drm/i915/display/intel_bios.c
@@ -596,6 +596,12 @@ parse_general_features(struct drm_i915_private *i915,
 	} else {
 		i915->vbt.orientation = DRM_MODE_PANEL_ORIENTATION_UNKNOWN;
 	}
+
+	if (bdb->version >= 249 && general->afc_startup_config) {
+		i915->vbt.override_afc_startup = true;
+		i915->vbt.override_afc_startup_val = general->afc_startup_config == 0x1 ? 0x0 : 0x7;
+	}
+
 	drm_dbg_kms(&i915->drm,
 		    "BDB_GENERAL_FEATURES int_tv_support %d int_crt_support %d lvds_use_ssc %d lvds_ssc_freq %d display_clock_mode %d fdi_rx_polarity_inverted %d\n",
 		    i915->vbt.int_tv_support,
diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c
index 5dce3cf0ed124dbec1defa2b125d1a0447df5c82..ad1564ca7269cd00bd0ee423ddfa9ea2c8fd94ef 100644
--- a/drivers/gpu/drm/i915/display/intel_bw.c
+++ b/drivers/gpu/drm/i915/display/intel_bw.c
@@ -10,6 +10,7 @@
 #include "intel_bw.h"
 #include "intel_cdclk.h"
 #include "intel_display_types.h"
+#include "intel_mchbar_regs.h"
 #include "intel_pcode.h"
 #include "intel_pm.h"
 
@@ -673,6 +674,49 @@ intel_atomic_get_bw_state(struct intel_atomic_state *state)
 	return to_intel_bw_state(bw_state);
 }
 
+static void skl_crtc_calc_dbuf_bw(struct intel_bw_state *bw_state,
+				  const struct intel_crtc_state *crtc_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
+	struct intel_dbuf_bw *crtc_bw = &bw_state->dbuf_bw[crtc->pipe];
+	enum plane_id plane_id;
+
+	memset(&crtc_bw->used_bw, 0, sizeof(crtc_bw->used_bw));
+
+	if (!crtc_state->hw.active)
+		return;
+
+	for_each_plane_id_on_crtc(crtc, plane_id) {
+		const struct skl_ddb_entry *ddb_y =
+			&crtc_state->wm.skl.plane_ddb_y[plane_id];
+		const struct skl_ddb_entry *ddb_uv =
+			&crtc_state->wm.skl.plane_ddb_uv[plane_id];
+		unsigned int data_rate = crtc_state->data_rate[plane_id];
+		unsigned int dbuf_mask = 0;
+		enum dbuf_slice slice;
+
+		dbuf_mask |= skl_ddb_dbuf_slice_mask(i915, ddb_y);
+		dbuf_mask |= skl_ddb_dbuf_slice_mask(i915, ddb_uv);
+
+		/*
+		 * FIXME: To calculate that more properly we probably
+		 * need to split per plane data_rate into data_rate_y
+		 * and data_rate_uv for multiplanar formats in order not
+		 * to get accounted those twice if they happen to reside
+		 * on different slices.
+		 * However for pre-icl this would work anyway because
+		 * we have only single slice and for icl+ uv plane has
+		 * non-zero data rate.
+		 * So in worst case those calculation are a bit
+		 * pessimistic, which shouldn't pose any significant
+		 * problem anyway.
+		 */
+		for_each_dbuf_slice_in_mask(i915, slice, dbuf_mask)
+			crtc_bw->used_bw[slice] += data_rate;
+	}
+}
+
 int skl_bw_calc_min_cdclk(struct intel_atomic_state *state)
 {
 	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
@@ -685,50 +729,13 @@ int skl_bw_calc_min_cdclk(struct intel_atomic_state *state)
 	int i;
 
 	for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) {
-		enum plane_id plane_id;
-		struct intel_dbuf_bw *crtc_bw;
-
 		new_bw_state = intel_atomic_get_bw_state(state);
 		if (IS_ERR(new_bw_state))
 			return PTR_ERR(new_bw_state);
 
 		old_bw_state = intel_atomic_get_old_bw_state(state);
 
-		crtc_bw = &new_bw_state->dbuf_bw[crtc->pipe];
-
-		memset(&crtc_bw->used_bw, 0, sizeof(crtc_bw->used_bw));
-
-		if (!crtc_state->hw.active)
-			continue;
-
-		for_each_plane_id_on_crtc(crtc, plane_id) {
-			const struct skl_ddb_entry *plane_alloc =
-				&crtc_state->wm.skl.plane_ddb_y[plane_id];
-			const struct skl_ddb_entry *uv_plane_alloc =
-				&crtc_state->wm.skl.plane_ddb_uv[plane_id];
-			unsigned int data_rate = crtc_state->data_rate[plane_id];
-			unsigned int dbuf_mask = 0;
-			enum dbuf_slice slice;
-
-			dbuf_mask |= skl_ddb_dbuf_slice_mask(dev_priv, plane_alloc);
-			dbuf_mask |= skl_ddb_dbuf_slice_mask(dev_priv, uv_plane_alloc);
-
-			/*
-			 * FIXME: To calculate that more properly we probably
-			 * need to to split per plane data_rate into data_rate_y
-			 * and data_rate_uv for multiplanar formats in order not
-			 * to get accounted those twice if they happen to reside
-			 * on different slices.
-			 * However for pre-icl this would work anyway because
-			 * we have only single slice and for icl+ uv plane has
-			 * non-zero data rate.
-			 * So in worst case those calculation are a bit
-			 * pessimistic, which shouldn't pose any significant
-			 * problem anyway.
-			 */
-			for_each_dbuf_slice_in_mask(dev_priv, slice, dbuf_mask)
-				crtc_bw->used_bw[slice] += data_rate;
-		}
+		skl_crtc_calc_dbuf_bw(new_bw_state, crtc_state);
 	}
 
 	if (!old_bw_state)
@@ -809,25 +816,11 @@ int intel_bw_calc_min_cdclk(struct intel_atomic_state *state)
 	return 0;
 }
 
-int intel_bw_atomic_check(struct intel_atomic_state *state)
+static u16 icl_qgv_points_mask(struct drm_i915_private *i915)
 {
-	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
-	struct intel_crtc_state *new_crtc_state, *old_crtc_state;
-	struct intel_bw_state *new_bw_state = NULL;
-	const struct intel_bw_state *old_bw_state = NULL;
-	unsigned int data_rate;
-	unsigned int num_active_planes;
-	struct intel_crtc *crtc;
-	int i, ret;
-	u32 allowed_points = 0;
-	unsigned int max_bw_point = 0, max_bw = 0;
-	unsigned int num_qgv_points = dev_priv->max_bw[0].num_qgv_points;
-	unsigned int num_psf_gv_points = dev_priv->max_bw[0].num_psf_gv_points;
-	u32 mask = 0;
-
-	/* FIXME earlier gens need some checks too */
-	if (DISPLAY_VER(dev_priv) < 11)
-		return 0;
+	unsigned int num_psf_gv_points = i915->max_bw[0].num_psf_gv_points;
+	unsigned int num_qgv_points = i915->max_bw[0].num_qgv_points;
+	u16 mask = 0;
 
 	/*
 	 * We can _not_ use the whole ADLS_QGV_PT_MASK here, as PCode rejects
@@ -840,6 +833,16 @@ int intel_bw_atomic_check(struct intel_atomic_state *state)
 	if (num_psf_gv_points > 0)
 		mask |= REG_GENMASK(num_psf_gv_points - 1, 0) << ADLS_PSF_PT_SHIFT;
 
+	return mask;
+}
+
+static int intel_bw_check_data_rate(struct intel_atomic_state *state, bool *changed)
+{
+	struct drm_i915_private *i915 = to_i915(state->base.dev);
+	const struct intel_crtc_state *new_crtc_state, *old_crtc_state;
+	struct intel_crtc *crtc;
+	int i;
+
 	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
 					    new_crtc_state, i) {
 		unsigned int old_data_rate =
@@ -850,6 +853,7 @@ int intel_bw_atomic_check(struct intel_atomic_state *state)
 			intel_bw_crtc_num_active_planes(old_crtc_state);
 		unsigned int new_active_planes =
 			intel_bw_crtc_num_active_planes(new_crtc_state);
+		struct intel_bw_state *new_bw_state;
 
 		/*
 		 * Avoid locking the bw state when
@@ -866,14 +870,53 @@ int intel_bw_atomic_check(struct intel_atomic_state *state)
 		new_bw_state->data_rate[crtc->pipe] = new_data_rate;
 		new_bw_state->num_active_planes[crtc->pipe] = new_active_planes;
 
-		drm_dbg_kms(&dev_priv->drm,
-			    "pipe %c data rate %u num active planes %u\n",
-			    pipe_name(crtc->pipe),
+		*changed = true;
+
+		drm_dbg_kms(&i915->drm,
+			    "[CRTC:%d:%s] data rate %u num active planes %u\n",
+			    crtc->base.base.id, crtc->base.name,
 			    new_bw_state->data_rate[crtc->pipe],
 			    new_bw_state->num_active_planes[crtc->pipe]);
 	}
 
-	if (!new_bw_state)
+	return 0;
+}
+
+int intel_bw_atomic_check(struct intel_atomic_state *state)
+{
+	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+	const struct intel_bw_state *old_bw_state;
+	struct intel_bw_state *new_bw_state;
+	unsigned int data_rate;
+	unsigned int num_active_planes;
+	int i, ret;
+	u32 allowed_points = 0;
+	unsigned int max_bw_point = 0, max_bw = 0;
+	unsigned int num_qgv_points = dev_priv->max_bw[0].num_qgv_points;
+	unsigned int num_psf_gv_points = dev_priv->max_bw[0].num_psf_gv_points;
+	bool changed = false;
+
+	/* FIXME earlier gens need some checks too */
+	if (DISPLAY_VER(dev_priv) < 11)
+		return 0;
+
+	ret = intel_bw_check_data_rate(state, &changed);
+	if (ret)
+		return ret;
+
+	old_bw_state = intel_atomic_get_old_bw_state(state);
+	new_bw_state = intel_atomic_get_new_bw_state(state);
+
+	if (new_bw_state &&
+	    intel_can_enable_sagv(dev_priv, old_bw_state) !=
+	    intel_can_enable_sagv(dev_priv, new_bw_state))
+		changed = true;
+
+	/*
+	 * If none of our inputs (data rates, number of active
+	 * planes, SAGV yes/no) changed then nothing to do here.
+	 */
+	if (!changed)
 		return 0;
 
 	ret = intel_atomic_lock_global_state(&new_bw_state->base);
@@ -957,9 +1000,9 @@ int intel_bw_atomic_check(struct intel_atomic_state *state)
 	 * We store the ones which need to be masked as that is what PCode
 	 * actually accepts as a parameter.
 	 */
-	new_bw_state->qgv_points_mask = ~allowed_points & mask;
+	new_bw_state->qgv_points_mask = ~allowed_points &
+		icl_qgv_points_mask(dev_priv);
 
-	old_bw_state = intel_atomic_get_old_bw_state(state);
 	/*
 	 * If the actual mask had changed we need to make sure that
 	 * the commits are serialized(in case this is a nomodeset, nonblocking)
diff --git a/drivers/gpu/drm/i915/display/intel_bw.h b/drivers/gpu/drm/i915/display/intel_bw.h
index 46c6eecbd9175ba51eaa90041f97bc2a3dc63280..0ceaed1c96562adfa97b239bd8ede37915f2e716 100644
--- a/drivers/gpu/drm/i915/display/intel_bw.h
+++ b/drivers/gpu/drm/i915/display/intel_bw.h
@@ -30,19 +30,19 @@ struct intel_bw_state {
 	 */
 	u8 pipe_sagv_reject;
 
+	/* bitmask of active pipes */
+	u8 active_pipes;
+
 	/*
 	 * Current QGV points mask, which restricts
 	 * some particular SAGV states, not to confuse
 	 * with pipe_sagv_mask.
 	 */
-	u8 qgv_points_mask;
+	u16 qgv_points_mask;
 
 	unsigned int data_rate[I915_MAX_PIPES];
 	u8 num_active_planes[I915_MAX_PIPES];
 
-	/* bitmask of active pipes */
-	u8 active_pipes;
-
 	int min_cdclk;
 };
 
diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c
index 4b140a014ca8ddc05f29d893ada44a55d5508f55..8888fda8b7016a63cf6d149cfb45d8f302be8b58 100644
--- a/drivers/gpu/drm/i915/display/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/display/intel_cdclk.c
@@ -23,6 +23,7 @@
 
 #include <linux/time.h>
 
+#include "hsw_ips.h"
 #include "intel_atomic.h"
 #include "intel_atomic_plane.h"
 #include "intel_audio.h"
@@ -31,6 +32,7 @@
 #include "intel_crtc.h"
 #include "intel_de.h"
 #include "intel_display_types.h"
+#include "intel_mchbar_regs.h"
 #include "intel_pci_config.h"
 #include "intel_pcode.h"
 #include "intel_psr.h"
diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c
index de3ded1e327a33527c03b3fd0db35bbab977b30a..e94ec57260f1709efd42e0a15cfcb7918f49bdcb 100644
--- a/drivers/gpu/drm/i915/display/intel_color.c
+++ b/drivers/gpu/drm/i915/display/intel_color.c
@@ -28,6 +28,25 @@
 #include "intel_dpll.h"
 #include "vlv_dsi_pll.h"
 
+struct intel_color_funcs {
+	int (*color_check)(struct intel_crtc_state *crtc_state);
+	/*
+	 * Program double buffered color management registers during
+	 * vblank evasion. The registers should then latch during the
+	 * next vblank start, alongside any other double buffered registers
+	 * involved with the same commit.
+	 */
+	void (*color_commit)(const struct intel_crtc_state *crtc_state);
+	/*
+	 * Load LUTs (and other single buffered color management
+	 * registers). Will (hopefully) be called during the vblank
+	 * following the latching of any double buffered registers
+	 * involved with the same commit.
+	 */
+	void (*load_luts)(const struct intel_crtc_state *crtc_state);
+	void (*read_luts)(struct intel_crtc_state *crtc_state);
+};
+
 #define CTM_COEFF_SIGN	(1ULL << 63)
 
 #define CTM_COEFF_1_0	(1ULL << 32)
@@ -160,29 +179,29 @@ static void ilk_update_pipe_csc(struct intel_crtc *crtc,
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum pipe pipe = crtc->pipe;
 
-	intel_de_write(dev_priv, PIPE_CSC_PREOFF_HI(pipe), preoff[0]);
-	intel_de_write(dev_priv, PIPE_CSC_PREOFF_ME(pipe), preoff[1]);
-	intel_de_write(dev_priv, PIPE_CSC_PREOFF_LO(pipe), preoff[2]);
+	intel_de_write_fw(dev_priv, PIPE_CSC_PREOFF_HI(pipe), preoff[0]);
+	intel_de_write_fw(dev_priv, PIPE_CSC_PREOFF_ME(pipe), preoff[1]);
+	intel_de_write_fw(dev_priv, PIPE_CSC_PREOFF_LO(pipe), preoff[2]);
 
-	intel_de_write(dev_priv, PIPE_CSC_COEFF_RY_GY(pipe),
-		       coeff[0] << 16 | coeff[1]);
-	intel_de_write(dev_priv, PIPE_CSC_COEFF_BY(pipe), coeff[2] << 16);
+	intel_de_write_fw(dev_priv, PIPE_CSC_COEFF_RY_GY(pipe),
+			  coeff[0] << 16 | coeff[1]);
+	intel_de_write_fw(dev_priv, PIPE_CSC_COEFF_BY(pipe), coeff[2] << 16);
 
-	intel_de_write(dev_priv, PIPE_CSC_COEFF_RU_GU(pipe),
-		       coeff[3] << 16 | coeff[4]);
-	intel_de_write(dev_priv, PIPE_CSC_COEFF_BU(pipe), coeff[5] << 16);
+	intel_de_write_fw(dev_priv, PIPE_CSC_COEFF_RU_GU(pipe),
+			  coeff[3] << 16 | coeff[4]);
+	intel_de_write_fw(dev_priv, PIPE_CSC_COEFF_BU(pipe), coeff[5] << 16);
 
-	intel_de_write(dev_priv, PIPE_CSC_COEFF_RV_GV(pipe),
-		       coeff[6] << 16 | coeff[7]);
-	intel_de_write(dev_priv, PIPE_CSC_COEFF_BV(pipe), coeff[8] << 16);
+	intel_de_write_fw(dev_priv, PIPE_CSC_COEFF_RV_GV(pipe),
+			  coeff[6] << 16 | coeff[7]);
+	intel_de_write_fw(dev_priv, PIPE_CSC_COEFF_BV(pipe), coeff[8] << 16);
 
 	if (DISPLAY_VER(dev_priv) >= 7) {
-		intel_de_write(dev_priv, PIPE_CSC_POSTOFF_HI(pipe),
-			       postoff[0]);
-		intel_de_write(dev_priv, PIPE_CSC_POSTOFF_ME(pipe),
-			       postoff[1]);
-		intel_de_write(dev_priv, PIPE_CSC_POSTOFF_LO(pipe),
-			       postoff[2]);
+		intel_de_write_fw(dev_priv, PIPE_CSC_POSTOFF_HI(pipe),
+				  postoff[0]);
+		intel_de_write_fw(dev_priv, PIPE_CSC_POSTOFF_ME(pipe),
+				  postoff[1]);
+		intel_de_write_fw(dev_priv, PIPE_CSC_POSTOFF_LO(pipe),
+				  postoff[2]);
 	}
 }
 
@@ -194,28 +213,28 @@ static void icl_update_output_csc(struct intel_crtc *crtc,
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum pipe pipe = crtc->pipe;
 
-	intel_de_write(dev_priv, PIPE_CSC_OUTPUT_PREOFF_HI(pipe), preoff[0]);
-	intel_de_write(dev_priv, PIPE_CSC_OUTPUT_PREOFF_ME(pipe), preoff[1]);
-	intel_de_write(dev_priv, PIPE_CSC_OUTPUT_PREOFF_LO(pipe), preoff[2]);
+	intel_de_write_fw(dev_priv, PIPE_CSC_OUTPUT_PREOFF_HI(pipe), preoff[0]);
+	intel_de_write_fw(dev_priv, PIPE_CSC_OUTPUT_PREOFF_ME(pipe), preoff[1]);
+	intel_de_write_fw(dev_priv, PIPE_CSC_OUTPUT_PREOFF_LO(pipe), preoff[2]);
 
-	intel_de_write(dev_priv, PIPE_CSC_OUTPUT_COEFF_RY_GY(pipe),
-		       coeff[0] << 16 | coeff[1]);
-	intel_de_write(dev_priv, PIPE_CSC_OUTPUT_COEFF_BY(pipe),
-		       coeff[2] << 16);
+	intel_de_write_fw(dev_priv, PIPE_CSC_OUTPUT_COEFF_RY_GY(pipe),
+			  coeff[0] << 16 | coeff[1]);
+	intel_de_write_fw(dev_priv, PIPE_CSC_OUTPUT_COEFF_BY(pipe),
+			  coeff[2] << 16);
 
-	intel_de_write(dev_priv, PIPE_CSC_OUTPUT_COEFF_RU_GU(pipe),
-		       coeff[3] << 16 | coeff[4]);
-	intel_de_write(dev_priv, PIPE_CSC_OUTPUT_COEFF_BU(pipe),
-		       coeff[5] << 16);
+	intel_de_write_fw(dev_priv, PIPE_CSC_OUTPUT_COEFF_RU_GU(pipe),
+			  coeff[3] << 16 | coeff[4]);
+	intel_de_write_fw(dev_priv, PIPE_CSC_OUTPUT_COEFF_BU(pipe),
+			  coeff[5] << 16);
 
-	intel_de_write(dev_priv, PIPE_CSC_OUTPUT_COEFF_RV_GV(pipe),
-		       coeff[6] << 16 | coeff[7]);
-	intel_de_write(dev_priv, PIPE_CSC_OUTPUT_COEFF_BV(pipe),
-		       coeff[8] << 16);
+	intel_de_write_fw(dev_priv, PIPE_CSC_OUTPUT_COEFF_RV_GV(pipe),
+			  coeff[6] << 16 | coeff[7]);
+	intel_de_write_fw(dev_priv, PIPE_CSC_OUTPUT_COEFF_BV(pipe),
+			  coeff[8] << 16);
 
-	intel_de_write(dev_priv, PIPE_CSC_OUTPUT_POSTOFF_HI(pipe), postoff[0]);
-	intel_de_write(dev_priv, PIPE_CSC_OUTPUT_POSTOFF_ME(pipe), postoff[1]);
-	intel_de_write(dev_priv, PIPE_CSC_OUTPUT_POSTOFF_LO(pipe), postoff[2]);
+	intel_de_write_fw(dev_priv, PIPE_CSC_OUTPUT_POSTOFF_HI(pipe), postoff[0]);
+	intel_de_write_fw(dev_priv, PIPE_CSC_OUTPUT_POSTOFF_ME(pipe), postoff[1]);
+	intel_de_write_fw(dev_priv, PIPE_CSC_OUTPUT_POSTOFF_LO(pipe), postoff[2]);
 }
 
 static bool ilk_csc_limited_range(const struct intel_crtc_state *crtc_state)
@@ -319,8 +338,8 @@ static void ilk_load_csc_matrix(const struct intel_crtc_state *crtc_state)
 				    ilk_csc_off_zero);
 	}
 
-	intel_de_write(dev_priv, PIPE_CSC_MODE(crtc->pipe),
-		       crtc_state->csc_mode);
+	intel_de_write_fw(dev_priv, PIPE_CSC_MODE(crtc->pipe),
+			  crtc_state->csc_mode);
 }
 
 static void icl_load_csc_matrix(const struct intel_crtc_state *crtc_state)
@@ -346,8 +365,8 @@ static void icl_load_csc_matrix(const struct intel_crtc_state *crtc_state)
 				      ilk_csc_postoff_limited_range);
 	}
 
-	intel_de_write(dev_priv, PIPE_CSC_MODE(crtc->pipe),
-		       crtc_state->csc_mode);
+	intel_de_write_fw(dev_priv, PIPE_CSC_MODE(crtc->pipe),
+			  crtc_state->csc_mode);
 }
 
 static void chv_load_cgm_csc(struct intel_crtc *crtc,
@@ -377,16 +396,16 @@ static void chv_load_cgm_csc(struct intel_crtc *crtc,
 		coeffs[i] |= (abs_coeff >> 20) & 0xfff;
 	}
 
-	intel_de_write(dev_priv, CGM_PIPE_CSC_COEFF01(pipe),
-		       coeffs[1] << 16 | coeffs[0]);
-	intel_de_write(dev_priv, CGM_PIPE_CSC_COEFF23(pipe),
-		       coeffs[3] << 16 | coeffs[2]);
-	intel_de_write(dev_priv, CGM_PIPE_CSC_COEFF45(pipe),
-		       coeffs[5] << 16 | coeffs[4]);
-	intel_de_write(dev_priv, CGM_PIPE_CSC_COEFF67(pipe),
-		       coeffs[7] << 16 | coeffs[6]);
-	intel_de_write(dev_priv, CGM_PIPE_CSC_COEFF8(pipe),
-		       coeffs[8]);
+	intel_de_write_fw(dev_priv, CGM_PIPE_CSC_COEFF01(pipe),
+			  coeffs[1] << 16 | coeffs[0]);
+	intel_de_write_fw(dev_priv, CGM_PIPE_CSC_COEFF23(pipe),
+			  coeffs[3] << 16 | coeffs[2]);
+	intel_de_write_fw(dev_priv, CGM_PIPE_CSC_COEFF45(pipe),
+			  coeffs[5] << 16 | coeffs[4]);
+	intel_de_write_fw(dev_priv, CGM_PIPE_CSC_COEFF67(pipe),
+			  coeffs[7] << 16 | coeffs[6]);
+	intel_de_write_fw(dev_priv, CGM_PIPE_CSC_COEFF8(pipe),
+			  coeffs[8]);
 }
 
 /* convert hw value with given bit_precision to lut property val */
diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index 354b08d6f81d08e5f83c088cf13bdfbd1b60aa59..e4260806c2a40b336abe98b66a0ba2011a3bc4bd 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -2703,6 +2703,7 @@ static void intel_ddi_post_disable(struct intel_atomic_state *state,
 	struct intel_digital_port *dig_port = enc_to_dig_port(encoder);
 	enum phy phy = intel_port_to_phy(dev_priv, encoder->port);
 	bool is_tc_port = intel_phy_is_tc(dev_priv, phy);
+	struct intel_crtc *slave_crtc;
 
 	if (!intel_crtc_has_type(old_crtc_state, INTEL_OUTPUT_DP_MST)) {
 		intel_crtc_vblank_off(old_crtc_state);
@@ -2721,9 +2722,8 @@ static void intel_ddi_post_disable(struct intel_atomic_state *state,
 			ilk_pfit_disable(old_crtc_state);
 	}
 
-	if (old_crtc_state->bigjoiner_linked_crtc) {
-		struct intel_crtc *slave_crtc =
-			old_crtc_state->bigjoiner_linked_crtc;
+	for_each_intel_crtc_in_pipe_mask(&dev_priv->drm, slave_crtc,
+					 intel_crtc_bigjoiner_slave_pipes(old_crtc_state)) {
 		const struct intel_crtc_state *old_slave_crtc_state =
 			intel_atomic_get_old_crtc_state(state, slave_crtc);
 
@@ -2926,7 +2926,7 @@ static void intel_enable_ddi(struct intel_atomic_state *state,
 {
 	drm_WARN_ON(state->base.dev, crtc_state->has_pch_encoder);
 
-	if (!crtc_state->bigjoiner_slave)
+	if (!intel_crtc_is_bigjoiner_slave(crtc_state))
 		intel_ddi_enable_transcoder_func(encoder, crtc_state);
 
 	intel_vrr_enable(encoder, crtc_state);
@@ -3041,6 +3041,7 @@ intel_ddi_update_prepare(struct intel_atomic_state *state,
 			 struct intel_encoder *encoder,
 			 struct intel_crtc *crtc)
 {
+	struct drm_i915_private *i915 = to_i915(state->base.dev);
 	struct intel_crtc_state *crtc_state =
 		crtc ? intel_atomic_get_new_crtc_state(state, crtc) : NULL;
 	int required_lanes = crtc_state ? crtc_state->lane_count : 1;
@@ -3050,11 +3051,12 @@ intel_ddi_update_prepare(struct intel_atomic_state *state,
 	intel_tc_port_get_link(enc_to_dig_port(encoder),
 		               required_lanes);
 	if (crtc_state && crtc_state->hw.active) {
-		struct intel_crtc *slave_crtc = crtc_state->bigjoiner_linked_crtc;
+		struct intel_crtc *slave_crtc;
 
 		intel_update_active_dpll(state, crtc, encoder);
 
-		if (slave_crtc)
+		for_each_intel_crtc_in_pipe_mask(&i915->drm, slave_crtc,
+						 intel_crtc_bigjoiner_slave_pipes(crtc_state))
 			intel_update_active_dpll(state, slave_crtc, encoder);
 	}
 }
@@ -3099,10 +3101,23 @@ intel_ddi_pre_pll_enable(struct intel_atomic_state *state,
 						crtc_state->lane_lat_optim_mask);
 }
 
+static void adlp_tbt_to_dp_alt_switch_wa(struct intel_encoder *encoder)
+{
+	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
+	enum tc_port tc_port = intel_port_to_tc(i915, encoder->port);
+	int ln;
+
+	for (ln = 0; ln < 2; ln++) {
+		intel_de_write(i915, HIP_INDEX_REG(tc_port), HIP_INDEX_VAL(tc_port, ln));
+		intel_de_rmw(i915, DKL_PCS_DW5(tc_port), DKL_PCS_DW5_CORE_SOFTRESET, 0);
+	}
+}
+
 static void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp,
 					   const struct intel_crtc_state *crtc_state)
 {
-	struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base;
+	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
+	struct intel_encoder *encoder = &dig_port->base;
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	enum port port = encoder->port;
 	u32 dp_tp_ctl, ddi_buf_ctl;
@@ -3138,6 +3153,10 @@ static void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp,
 	intel_de_write(dev_priv, dp_tp_ctl_reg(encoder, crtc_state), dp_tp_ctl);
 	intel_de_posting_read(dev_priv, dp_tp_ctl_reg(encoder, crtc_state));
 
+	if (IS_ALDERLAKE_P(dev_priv) &&
+	    (intel_tc_port_in_dp_alt_mode(dig_port) || intel_tc_port_in_legacy_mode(dig_port)))
+		adlp_tbt_to_dp_alt_switch_wa(encoder);
+
 	intel_dp->DP |= DDI_BUF_CTL_ENABLE;
 	intel_de_write(dev_priv, DDI_BUF_CTL(port), intel_dp->DP);
 	intel_de_posting_read(dev_priv, DDI_BUF_CTL(port));
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 7f512f9e9e5c7536b44f8803213ec53fee81aef0..80b19c304c4325179e55b5f8c96bf13a3b2459be 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -74,6 +74,7 @@
 
 #include "g4x_dp.h"
 #include "g4x_hdmi.h"
+#include "hsw_ips.h"
 #include "i915_drv.h"
 #include "icl_dsi.h"
 #include "intel_acpi.h"
@@ -112,9 +113,10 @@
 #include "i9xx_plane.h"
 #include "skl_scaler.h"
 #include "skl_universal_plane.h"
+#include "vlv_dsi.h"
 #include "vlv_dsi_pll.h"
+#include "vlv_dsi_regs.h"
 #include "vlv_sideband.h"
-#include "vlv_dsi.h"
 
 static void intel_set_transcoder_timings(const struct intel_crtc_state *crtc_state);
 static void intel_set_pipe_src_size(const struct intel_crtc_state *crtc_state);
@@ -337,10 +339,38 @@ is_trans_port_sync_mode(const struct intel_crtc_state *crtc_state)
 		is_trans_port_sync_slave(crtc_state);
 }
 
-static struct intel_crtc *intel_master_crtc(const struct intel_crtc_state *crtc_state)
+static enum pipe bigjoiner_master_pipe(const struct intel_crtc_state *crtc_state)
+{
+	return ffs(crtc_state->bigjoiner_pipes) - 1;
+}
+
+u8 intel_crtc_bigjoiner_slave_pipes(const struct intel_crtc_state *crtc_state)
+{
+	return crtc_state->bigjoiner_pipes & ~BIT(bigjoiner_master_pipe(crtc_state));
+}
+
+bool intel_crtc_is_bigjoiner_slave(const struct intel_crtc_state *crtc_state)
 {
-	if (crtc_state->bigjoiner_slave)
-		return crtc_state->bigjoiner_linked_crtc;
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+
+	return crtc_state->bigjoiner_pipes &&
+		crtc->pipe != bigjoiner_master_pipe(crtc_state);
+}
+
+bool intel_crtc_is_bigjoiner_master(const struct intel_crtc_state *crtc_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+
+	return crtc_state->bigjoiner_pipes &&
+		crtc->pipe == bigjoiner_master_pipe(crtc_state);
+}
+
+struct intel_crtc *intel_master_crtc(const struct intel_crtc_state *crtc_state)
+{
+	struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev);
+
+	if (intel_crtc_is_bigjoiner_slave(crtc_state))
+		return intel_crtc_for_pipe(i915, bigjoiner_master_pipe(crtc_state));
 	else
 		return to_intel_crtc(crtc_state->uapi.crtc);
 }
@@ -752,8 +782,11 @@ void intel_plane_disable_noatomic(struct intel_crtc *crtc,
 	crtc_state->data_rate[plane->id] = 0;
 	crtc_state->min_cdclk[plane->id] = 0;
 
-	if (plane->id == PLANE_PRIMARY)
-		hsw_disable_ips(crtc_state);
+	if ((crtc_state->active_planes & ~BIT(PLANE_CURSOR)) == 0 &&
+	    hsw_ips_disable(crtc_state)) {
+		crtc_state->ips_enabled = false;
+		intel_crtc_wait_for_next_vblank(crtc);
+	}
 
 	/*
 	 * Vblank time updates from the shadow to live plane control register
@@ -1090,72 +1123,6 @@ static void ilk_pfit_enable(const struct intel_crtc_state *crtc_state)
 	intel_de_write(dev_priv, PF_WIN_SZ(pipe), width << 16 | height);
 }
 
-void hsw_enable_ips(const struct intel_crtc_state *crtc_state)
-{
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
-	struct drm_device *dev = crtc->base.dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-
-	if (!crtc_state->ips_enabled)
-		return;
-
-	/*
-	 * We can only enable IPS after we enable a plane and wait for a vblank
-	 * This function is called from post_plane_update, which is run after
-	 * a vblank wait.
-	 */
-	drm_WARN_ON(dev, !(crtc_state->active_planes & ~BIT(PLANE_CURSOR)));
-
-	if (IS_BROADWELL(dev_priv)) {
-		drm_WARN_ON(dev, snb_pcode_write(dev_priv, DISPLAY_IPS_CONTROL,
-						 IPS_ENABLE | IPS_PCODE_CONTROL));
-		/* Quoting Art Runyan: "its not safe to expect any particular
-		 * value in IPS_CTL bit 31 after enabling IPS through the
-		 * mailbox." Moreover, the mailbox may return a bogus state,
-		 * so we need to just enable it and continue on.
-		 */
-	} else {
-		intel_de_write(dev_priv, IPS_CTL, IPS_ENABLE);
-		/* The bit only becomes 1 in the next vblank, so this wait here
-		 * is essentially intel_wait_for_vblank. If we don't have this
-		 * and don't wait for vblanks until the end of crtc_enable, then
-		 * the HW state readout code will complain that the expected
-		 * IPS_CTL value is not the one we read. */
-		if (intel_de_wait_for_set(dev_priv, IPS_CTL, IPS_ENABLE, 50))
-			drm_err(&dev_priv->drm,
-				"Timed out waiting for IPS enable\n");
-	}
-}
-
-void hsw_disable_ips(const struct intel_crtc_state *crtc_state)
-{
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
-	struct drm_device *dev = crtc->base.dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-
-	if (!crtc_state->ips_enabled)
-		return;
-
-	if (IS_BROADWELL(dev_priv)) {
-		drm_WARN_ON(dev,
-			    snb_pcode_write(dev_priv, DISPLAY_IPS_CONTROL, 0));
-		/*
-		 * Wait for PCODE to finish disabling IPS. The BSpec specified
-		 * 42ms timeout value leads to occasional timeouts so use 100ms
-		 * instead.
-		 */
-		if (intel_de_wait_for_clear(dev_priv, IPS_CTL, IPS_ENABLE, 100))
-			drm_err(&dev_priv->drm,
-				"Timed out waiting for IPS disable\n");
-	} else {
-		intel_de_write(dev_priv, IPS_CTL, 0);
-		intel_de_posting_read(dev_priv, IPS_CTL);
-	}
-
-	/* We need to wait for a vblank before we can disable the plane. */
-	intel_crtc_wait_for_next_vblank(crtc);
-}
-
 static void intel_crtc_dpms_overlay_disable(struct intel_crtc *crtc)
 {
 	if (crtc->overlay)
@@ -1166,67 +1133,6 @@ static void intel_crtc_dpms_overlay_disable(struct intel_crtc *crtc)
 	 */
 }
 
-static bool hsw_pre_update_disable_ips(const struct intel_crtc_state *old_crtc_state,
-				       const struct intel_crtc_state *new_crtc_state)
-{
-	struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->uapi.crtc);
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
-
-	if (!old_crtc_state->ips_enabled)
-		return false;
-
-	if (intel_crtc_needs_modeset(new_crtc_state))
-		return true;
-
-	/*
-	 * Workaround : Do not read or write the pipe palette/gamma data while
-	 * GAMMA_MODE is configured for split gamma and IPS_CTL has IPS enabled.
-	 *
-	 * Disable IPS before we program the LUT.
-	 */
-	if (IS_HASWELL(dev_priv) &&
-	    (new_crtc_state->uapi.color_mgmt_changed ||
-	     new_crtc_state->update_pipe) &&
-	    new_crtc_state->gamma_mode == GAMMA_MODE_MODE_SPLIT)
-		return true;
-
-	return !new_crtc_state->ips_enabled;
-}
-
-static bool hsw_post_update_enable_ips(const struct intel_crtc_state *old_crtc_state,
-				       const struct intel_crtc_state *new_crtc_state)
-{
-	struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->uapi.crtc);
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
-
-	if (!new_crtc_state->ips_enabled)
-		return false;
-
-	if (intel_crtc_needs_modeset(new_crtc_state))
-		return true;
-
-	/*
-	 * Workaround : Do not read or write the pipe palette/gamma data while
-	 * GAMMA_MODE is configured for split gamma and IPS_CTL has IPS enabled.
-	 *
-	 * Re-enable IPS after the LUT has been programmed.
-	 */
-	if (IS_HASWELL(dev_priv) &&
-	    (new_crtc_state->uapi.color_mgmt_changed ||
-	     new_crtc_state->update_pipe) &&
-	    new_crtc_state->gamma_mode == GAMMA_MODE_MODE_SPLIT)
-		return true;
-
-	/*
-	 * We can't read out IPS on broadwell, assume the worst and
-	 * forcibly enable IPS on the first fastset.
-	 */
-	if (new_crtc_state->update_pipe && old_crtc_state->inherited)
-		return true;
-
-	return !old_crtc_state->ips_enabled;
-}
-
 static bool needs_nv12_wa(const struct intel_crtc_state *crtc_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
@@ -1321,9 +1227,7 @@ static void intel_post_plane_update(struct intel_atomic_state *state,
 	if (new_crtc_state->update_wm_post && new_crtc_state->hw.active)
 		intel_update_watermarks(dev_priv);
 
-	if (hsw_post_update_enable_ips(old_crtc_state, new_crtc_state))
-		hsw_enable_ips(new_crtc_state);
-
+	hsw_ips_post_update(state, crtc);
 	intel_fbc_post_update(state, crtc);
 	intel_drrs_page_flip(state, crtc);
 
@@ -1426,8 +1330,8 @@ static void intel_pre_plane_update(struct intel_atomic_state *state,
 
 	intel_psr_pre_plane_update(state, crtc);
 
-	if (hsw_pre_update_disable_ips(old_crtc_state, new_crtc_state))
-		hsw_disable_ips(old_crtc_state);
+	if (hsw_ips_pre_update(state, crtc))
+		intel_crtc_wait_for_next_vblank(crtc);
 
 	if (intel_fbc_pre_update(state, crtc))
 		intel_crtc_wait_for_next_vblank(crtc);
@@ -1905,12 +1809,6 @@ static void ilk_crtc_enable(struct intel_atomic_state *state,
 	intel_set_pch_fifo_underrun_reporting(dev_priv, pipe, true);
 }
 
-/* IPS only exists on ULT machines and is tied to pipe A. */
-static bool hsw_crtc_supports_ips(struct intel_crtc *crtc)
-{
-	return HAS_IPS(to_i915(crtc->base.dev)) && crtc->pipe == PIPE_A;
-}
-
 static void glk_pipe_scaler_clock_gating_wa(struct drm_i915_private *dev_priv,
 					    enum pipe pipe, bool apply)
 {
@@ -1974,34 +1872,18 @@ static void hsw_set_frame_start_delay(const struct intel_crtc_state *crtc_state)
 static void icl_ddi_bigjoiner_pre_enable(struct intel_atomic_state *state,
 					 const struct intel_crtc_state *crtc_state)
 {
-	struct intel_crtc_state *master_crtc_state;
-	struct intel_crtc *master_crtc;
-	struct drm_connector_state *conn_state;
-	struct drm_connector *conn;
-	struct intel_encoder *encoder = NULL;
-	int i;
-
-	master_crtc = intel_master_crtc(crtc_state);
-	master_crtc_state = intel_atomic_get_new_crtc_state(state, master_crtc);
-
-	for_each_new_connector_in_state(&state->base, conn, conn_state, i) {
-		if (conn_state->crtc != &master_crtc->base)
-			continue;
-
-		encoder = to_intel_encoder(conn_state->best_encoder);
-		break;
-	}
+	struct intel_crtc *master_crtc = intel_master_crtc(crtc_state);
 
 	/*
 	 * Enable sequence steps 1-7 on bigjoiner master
 	 */
-	if (crtc_state->bigjoiner_slave)
+	if (intel_crtc_is_bigjoiner_slave(crtc_state))
 		intel_encoders_pre_pll_enable(state, master_crtc);
 
 	if (crtc_state->shared_dpll)
 		intel_enable_shared_dpll(crtc_state);
 
-	if (crtc_state->bigjoiner_slave)
+	if (intel_crtc_is_bigjoiner_slave(crtc_state))
 		intel_encoders_pre_enable(state, master_crtc);
 }
 
@@ -2065,7 +1947,8 @@ static void hsw_crtc_enable(struct intel_atomic_state *state,
 	if (DISPLAY_VER(dev_priv) >= 9 || IS_BROADWELL(dev_priv))
 		bdw_set_pipemisc(new_crtc_state);
 
-	if (!new_crtc_state->bigjoiner_slave && !transcoder_is_dsi(cpu_transcoder))
+	if (!intel_crtc_is_bigjoiner_slave(new_crtc_state) &&
+	    !transcoder_is_dsi(cpu_transcoder))
 		hsw_configure_cpu_transcoder(new_crtc_state);
 
 	crtc->active = true;
@@ -2105,7 +1988,7 @@ static void hsw_crtc_enable(struct intel_atomic_state *state,
 		icl_pipe_mbus_enable(crtc, dbuf_state->joined_mbus);
 	}
 
-	if (new_crtc_state->bigjoiner_slave)
+	if (intel_crtc_is_bigjoiner_slave(new_crtc_state))
 		intel_crtc_vblank_on(new_crtc_state);
 
 	intel_encoders_enable(state, crtc);
@@ -2190,7 +2073,7 @@ static void hsw_crtc_disable(struct intel_atomic_state *state,
 	 * FIXME collapse everything to one hook.
 	 * Need care with mst->ddi interactions.
 	 */
-	if (!old_crtc_state->bigjoiner_slave) {
+	if (!intel_crtc_is_bigjoiner_slave(old_crtc_state)) {
 		intel_encoders_disable(state, crtc);
 		intel_encoders_post_disable(state, crtc);
 	}
@@ -2778,77 +2661,6 @@ static void intel_connector_verify_state(struct intel_crtc_state *crtc_state,
 	}
 }
 
-bool hsw_crtc_state_ips_capable(const struct intel_crtc_state *crtc_state)
-{
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
-
-	/* IPS only exists on ULT machines and is tied to pipe A. */
-	if (!hsw_crtc_supports_ips(crtc))
-		return false;
-
-	if (!dev_priv->params.enable_ips)
-		return false;
-
-	if (crtc_state->pipe_bpp > 24)
-		return false;
-
-	/*
-	 * We compare against max which means we must take
-	 * the increased cdclk requirement into account when
-	 * calculating the new cdclk.
-	 *
-	 * Should measure whether using a lower cdclk w/o IPS
-	 */
-	if (IS_BROADWELL(dev_priv) &&
-	    crtc_state->pixel_rate > dev_priv->max_cdclk_freq * 95 / 100)
-		return false;
-
-	return true;
-}
-
-static int hsw_compute_ips_config(struct intel_crtc_state *crtc_state)
-{
-	struct drm_i915_private *dev_priv =
-		to_i915(crtc_state->uapi.crtc->dev);
-	struct intel_atomic_state *state =
-		to_intel_atomic_state(crtc_state->uapi.state);
-
-	crtc_state->ips_enabled = false;
-
-	if (!hsw_crtc_state_ips_capable(crtc_state))
-		return 0;
-
-	/*
-	 * When IPS gets enabled, the pipe CRC changes. Since IPS gets
-	 * enabled and disabled dynamically based on package C states,
-	 * user space can't make reliable use of the CRCs, so let's just
-	 * completely disable it.
-	 */
-	if (crtc_state->crc_enabled)
-		return 0;
-
-	/* IPS should be fine as long as at least one plane is enabled. */
-	if (!(crtc_state->active_planes & ~BIT(PLANE_CURSOR)))
-		return 0;
-
-	if (IS_BROADWELL(dev_priv)) {
-		const struct intel_cdclk_state *cdclk_state;
-
-		cdclk_state = intel_atomic_get_cdclk_state(state);
-		if (IS_ERR(cdclk_state))
-			return PTR_ERR(cdclk_state);
-
-		/* pixel rate mustn't exceed 95% of cdclk with IPS on BDW */
-		if (crtc_state->pixel_rate > cdclk_state->logical.cdclk * 95 / 100)
-			return 0;
-	}
-
-	crtc_state->ips_enabled = true;
-
-	return 0;
-}
-
 static bool intel_crtc_supports_double_wide(const struct intel_crtc *crtc)
 {
 	const struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
@@ -3347,13 +3159,11 @@ static void i9xx_set_pipeconf(const struct intel_crtc_state *crtc_state)
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
-	u32 pipeconf;
-
-	pipeconf = 0;
+	u32 pipeconf = 0;
 
 	/* we keep both pipes enabled on 830 */
 	if (IS_I830(dev_priv))
-		pipeconf |= intel_de_read(dev_priv, PIPECONF(crtc->pipe)) & PIPECONF_ENABLE;
+		pipeconf |= PIPECONF_ENABLE;
 
 	if (crtc_state->double_wide)
 		pipeconf |= PIPECONF_DOUBLE_WIDE;
@@ -4069,19 +3879,20 @@ static bool transcoder_ddi_func_is_enabled(struct drm_i915_private *dev_priv,
 	return tmp & TRANS_DDI_FUNC_ENABLE;
 }
 
-static u8 enabled_bigjoiner_pipes(struct drm_i915_private *dev_priv)
+static void enabled_bigjoiner_pipes(struct drm_i915_private *dev_priv,
+				    u8 *master_pipes, u8 *slave_pipes)
 {
-	u8 master_pipes = 0, slave_pipes = 0;
 	struct intel_crtc *crtc;
 
-	for_each_intel_crtc(&dev_priv->drm, crtc) {
+	*master_pipes = 0;
+	*slave_pipes = 0;
+
+	for_each_intel_crtc_in_pipe_mask(&dev_priv->drm, crtc,
+					 bigjoiner_pipes(dev_priv)) {
 		enum intel_display_power_domain power_domain;
 		enum pipe pipe = crtc->pipe;
 		intel_wakeref_t wakeref;
 
-		if ((bigjoiner_pipes(dev_priv) & BIT(pipe)) == 0)
-			continue;
-
 		power_domain = intel_dsc_power_domain(crtc, (enum transcoder) pipe);
 		with_intel_display_power_if_enabled(dev_priv, power_domain, wakeref) {
 			u32 tmp = intel_de_read(dev_priv, ICL_PIPE_DSS_CTL1(pipe));
@@ -4090,9 +3901,9 @@ static u8 enabled_bigjoiner_pipes(struct drm_i915_private *dev_priv)
 				continue;
 
 			if (tmp & MASTER_BIG_JOINER_ENABLE)
-				master_pipes |= BIT(pipe);
+				*master_pipes |= BIT(pipe);
 			else
-				slave_pipes |= BIT(pipe);
+				*slave_pipes |= BIT(pipe);
 		}
 
 		if (DISPLAY_VER(dev_priv) < 13)
@@ -4103,18 +3914,47 @@ static u8 enabled_bigjoiner_pipes(struct drm_i915_private *dev_priv)
 			u32 tmp = intel_de_read(dev_priv, ICL_PIPE_DSS_CTL1(pipe));
 
 			if (tmp & UNCOMPRESSED_JOINER_MASTER)
-				master_pipes |= BIT(pipe);
+				*master_pipes |= BIT(pipe);
 			if (tmp & UNCOMPRESSED_JOINER_SLAVE)
-				slave_pipes |= BIT(pipe);
+				*slave_pipes |= BIT(pipe);
 		}
 	}
 
 	/* Bigjoiner pipes should always be consecutive master and slave */
-	drm_WARN(&dev_priv->drm, slave_pipes != master_pipes << 1,
+	drm_WARN(&dev_priv->drm, *slave_pipes != *master_pipes << 1,
 		 "Bigjoiner misconfigured (master pipes 0x%x, slave pipes 0x%x)\n",
-		 master_pipes, slave_pipes);
+		 *master_pipes, *slave_pipes);
+}
 
-	return slave_pipes;
+static enum pipe get_bigjoiner_master_pipe(enum pipe pipe, u8 master_pipes, u8 slave_pipes)
+{
+	if ((slave_pipes & BIT(pipe)) == 0)
+		return pipe;
+
+	/* ignore everything above our pipe */
+	master_pipes &= ~GENMASK(7, pipe);
+
+	/* highest remaining bit should be our master pipe */
+	return fls(master_pipes) - 1;
+}
+
+static u8 get_bigjoiner_slave_pipes(enum pipe pipe, u8 master_pipes, u8 slave_pipes)
+{
+	enum pipe master_pipe, next_master_pipe;
+
+	master_pipe = get_bigjoiner_master_pipe(pipe, master_pipes, slave_pipes);
+
+	if ((master_pipes & BIT(master_pipe)) == 0)
+		return 0;
+
+	/* ignore our master pipe and everything below it */
+	master_pipes &= ~GENMASK(master_pipe, 0);
+	/* make sure a high bit is set for the ffs() */
+	master_pipes |= BIT(7);
+	/* lowest remaining bit should be the next master pipe */
+	next_master_pipe = ffs(master_pipes) - 1;
+
+	return slave_pipes & GENMASK(next_master_pipe - 1, master_pipe);
 }
 
 static u8 hsw_panel_transcoders(struct drm_i915_private *i915)
@@ -4133,6 +3973,7 @@ static u8 hsw_enabled_transcoders(struct intel_crtc *crtc)
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	u8 panel_transcoder_mask = hsw_panel_transcoders(dev_priv);
 	enum transcoder cpu_transcoder;
+	u8 master_pipes, slave_pipes;
 	u8 enabled_transcoders = 0;
 
 	/*
@@ -4184,8 +4025,10 @@ static u8 hsw_enabled_transcoders(struct intel_crtc *crtc)
 		enabled_transcoders |= BIT(cpu_transcoder);
 
 	/* bigjoiner slave -> consider the master pipe's transcoder as well */
-	if (enabled_bigjoiner_pipes(dev_priv) & BIT(crtc->pipe)) {
-		cpu_transcoder = (enum transcoder) crtc->pipe - 1;
+	enabled_bigjoiner_pipes(dev_priv, &master_pipes, &slave_pipes);
+	if (slave_pipes & BIT(crtc->pipe)) {
+		cpu_transcoder = (enum transcoder)
+			get_bigjoiner_master_pipe(crtc->pipe, master_pipes, slave_pipes);
 		if (transcoder_ddi_func_is_enabled(dev_priv, cpu_transcoder))
 			enabled_transcoders |= BIT(cpu_transcoder);
 	}
@@ -4310,6 +4153,24 @@ static bool bxt_get_dsi_transcoder_state(struct intel_crtc *crtc,
 	return transcoder_is_dsi(pipe_config->cpu_transcoder);
 }
 
+static void intel_bigjoiner_get_config(struct intel_crtc_state *crtc_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
+	u8 master_pipes, slave_pipes;
+	enum pipe pipe = crtc->pipe;
+
+	enabled_bigjoiner_pipes(i915, &master_pipes, &slave_pipes);
+
+	if (((master_pipes | slave_pipes) & BIT(pipe)) == 0)
+		return;
+
+	crtc_state->bigjoiner = true;
+	crtc_state->bigjoiner_pipes =
+		BIT(get_bigjoiner_master_pipe(pipe, master_pipes, slave_pipes)) |
+		get_bigjoiner_slave_pipes(pipe, master_pipes, slave_pipes);
+}
+
 static bool hsw_get_pipe_config(struct intel_crtc *crtc,
 				struct intel_crtc_state *pipe_config)
 {
@@ -4336,8 +4197,7 @@ static bool hsw_get_pipe_config(struct intel_crtc *crtc,
 		goto out;
 
 	intel_dsc_get_config(pipe_config);
-	if (DISPLAY_VER(dev_priv) >= 13 && !pipe_config->dsc.compression_enable)
-		intel_uncompressed_joiner_get_config(pipe_config);
+	intel_bigjoiner_get_config(pipe_config);
 
 	if (!transcoder_is_dsi(pipe_config->cpu_transcoder) ||
 	    DISPLAY_VER(dev_priv) >= 11)
@@ -4395,19 +4255,7 @@ static bool hsw_get_pipe_config(struct intel_crtc *crtc,
 			ilk_get_pfit_config(pipe_config);
 	}
 
-	if (hsw_crtc_supports_ips(crtc)) {
-		if (IS_HASWELL(dev_priv))
-			pipe_config->ips_enabled = intel_de_read(dev_priv,
-								 IPS_CTL) & IPS_ENABLE;
-		else {
-			/*
-			 * We cannot readout IPS state on broadwell, set to
-			 * true so we can set it to a defined state on first
-			 * commit.
-			 */
-			pipe_config->ips_enabled = true;
-		}
-	}
+	hsw_ips_get_config(pipe_config);
 
 	if (pipe_config->cpu_transcoder != TRANSCODER_EDP &&
 	    !transcoder_is_dsi(pipe_config->cpu_transcoder)) {
@@ -4819,194 +4667,6 @@ intel_encoder_current_mode(struct intel_encoder *encoder)
 	return mode;
 }
 
-/**
- * intel_wm_need_update - Check whether watermarks need updating
- * @cur: current plane state
- * @new: new plane state
- *
- * Check current plane state versus the new one to determine whether
- * watermarks need to be recalculated.
- *
- * Returns true or false.
- */
-static bool intel_wm_need_update(const struct intel_plane_state *cur,
-				 struct intel_plane_state *new)
-{
-	/* Update watermarks on tiling or size changes. */
-	if (new->uapi.visible != cur->uapi.visible)
-		return true;
-
-	if (!cur->hw.fb || !new->hw.fb)
-		return false;
-
-	if (cur->hw.fb->modifier != new->hw.fb->modifier ||
-	    cur->hw.rotation != new->hw.rotation ||
-	    drm_rect_width(&new->uapi.src) != drm_rect_width(&cur->uapi.src) ||
-	    drm_rect_height(&new->uapi.src) != drm_rect_height(&cur->uapi.src) ||
-	    drm_rect_width(&new->uapi.dst) != drm_rect_width(&cur->uapi.dst) ||
-	    drm_rect_height(&new->uapi.dst) != drm_rect_height(&cur->uapi.dst))
-		return true;
-
-	return false;
-}
-
-static bool needs_scaling(const struct intel_plane_state *state)
-{
-	int src_w = drm_rect_width(&state->uapi.src) >> 16;
-	int src_h = drm_rect_height(&state->uapi.src) >> 16;
-	int dst_w = drm_rect_width(&state->uapi.dst);
-	int dst_h = drm_rect_height(&state->uapi.dst);
-
-	return (src_w != dst_w || src_h != dst_h);
-}
-
-static bool intel_plane_do_async_flip(struct intel_plane *plane,
-				      const struct intel_crtc_state *old_crtc_state,
-				      const struct intel_crtc_state *new_crtc_state)
-{
-	struct drm_i915_private *i915 = to_i915(plane->base.dev);
-
-	if (!plane->async_flip)
-		return false;
-
-	if (!new_crtc_state->uapi.async_flip)
-		return false;
-
-	/*
-	 * In platforms after DISPLAY13, we might need to override
-	 * first async flip in order to change watermark levels
-	 * as part of optimization.
-	 * So for those, we are checking if this is a first async flip.
-	 * For platforms earlier than DISPLAY13 we always do async flip.
-	 */
-	return DISPLAY_VER(i915) < 13 || old_crtc_state->uapi.async_flip;
-}
-
-int intel_plane_atomic_calc_changes(const struct intel_crtc_state *old_crtc_state,
-				    struct intel_crtc_state *new_crtc_state,
-				    const struct intel_plane_state *old_plane_state,
-				    struct intel_plane_state *new_plane_state)
-{
-	struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->uapi.crtc);
-	struct intel_plane *plane = to_intel_plane(new_plane_state->uapi.plane);
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
-	bool mode_changed = intel_crtc_needs_modeset(new_crtc_state);
-	bool was_crtc_enabled = old_crtc_state->hw.active;
-	bool is_crtc_enabled = new_crtc_state->hw.active;
-	bool turn_off, turn_on, visible, was_visible;
-	int ret;
-
-	if (DISPLAY_VER(dev_priv) >= 9 && plane->id != PLANE_CURSOR) {
-		ret = skl_update_scaler_plane(new_crtc_state, new_plane_state);
-		if (ret)
-			return ret;
-	}
-
-	was_visible = old_plane_state->uapi.visible;
-	visible = new_plane_state->uapi.visible;
-
-	if (!was_crtc_enabled && drm_WARN_ON(&dev_priv->drm, was_visible))
-		was_visible = false;
-
-	/*
-	 * Visibility is calculated as if the crtc was on, but
-	 * after scaler setup everything depends on it being off
-	 * when the crtc isn't active.
-	 *
-	 * FIXME this is wrong for watermarks. Watermarks should also
-	 * be computed as if the pipe would be active. Perhaps move
-	 * per-plane wm computation to the .check_plane() hook, and
-	 * only combine the results from all planes in the current place?
-	 */
-	if (!is_crtc_enabled) {
-		intel_plane_set_invisible(new_crtc_state, new_plane_state);
-		visible = false;
-	}
-
-	if (!was_visible && !visible)
-		return 0;
-
-	turn_off = was_visible && (!visible || mode_changed);
-	turn_on = visible && (!was_visible || mode_changed);
-
-	drm_dbg_atomic(&dev_priv->drm,
-		       "[CRTC:%d:%s] with [PLANE:%d:%s] visible %i -> %i, off %i, on %i, ms %i\n",
-		       crtc->base.base.id, crtc->base.name,
-		       plane->base.base.id, plane->base.name,
-		       was_visible, visible,
-		       turn_off, turn_on, mode_changed);
-
-	if (turn_on) {
-		if (DISPLAY_VER(dev_priv) < 5 && !IS_G4X(dev_priv))
-			new_crtc_state->update_wm_pre = true;
-
-		/* must disable cxsr around plane enable/disable */
-		if (plane->id != PLANE_CURSOR)
-			new_crtc_state->disable_cxsr = true;
-	} else if (turn_off) {
-		if (DISPLAY_VER(dev_priv) < 5 && !IS_G4X(dev_priv))
-			new_crtc_state->update_wm_post = true;
-
-		/* must disable cxsr around plane enable/disable */
-		if (plane->id != PLANE_CURSOR)
-			new_crtc_state->disable_cxsr = true;
-	} else if (intel_wm_need_update(old_plane_state, new_plane_state)) {
-		if (DISPLAY_VER(dev_priv) < 5 && !IS_G4X(dev_priv)) {
-			/* FIXME bollocks */
-			new_crtc_state->update_wm_pre = true;
-			new_crtc_state->update_wm_post = true;
-		}
-	}
-
-	if (visible || was_visible)
-		new_crtc_state->fb_bits |= plane->frontbuffer_bit;
-
-	/*
-	 * ILK/SNB DVSACNTR/Sprite Enable
-	 * IVB SPR_CTL/Sprite Enable
-	 * "When in Self Refresh Big FIFO mode, a write to enable the
-	 *  plane will be internally buffered and delayed while Big FIFO
-	 *  mode is exiting."
-	 *
-	 * Which means that enabling the sprite can take an extra frame
-	 * when we start in big FIFO mode (LP1+). Thus we need to drop
-	 * down to LP0 and wait for vblank in order to make sure the
-	 * sprite gets enabled on the next vblank after the register write.
-	 * Doing otherwise would risk enabling the sprite one frame after
-	 * we've already signalled flip completion. We can resume LP1+
-	 * once the sprite has been enabled.
-	 *
-	 *
-	 * WaCxSRDisabledForSpriteScaling:ivb
-	 * IVB SPR_SCALE/Scaling Enable
-	 * "Low Power watermarks must be disabled for at least one
-	 *  frame before enabling sprite scaling, and kept disabled
-	 *  until sprite scaling is disabled."
-	 *
-	 * ILK/SNB DVSASCALE/Scaling Enable
-	 * "When in Self Refresh Big FIFO mode, scaling enable will be
-	 *  masked off while Big FIFO mode is exiting."
-	 *
-	 * Despite the w/a only being listed for IVB we assume that
-	 * the ILK/SNB note has similar ramifications, hence we apply
-	 * the w/a on all three platforms.
-	 *
-	 * With experimental results seems this is needed also for primary
-	 * plane, not only sprite plane.
-	 */
-	if (plane->id != PLANE_CURSOR &&
-	    (IS_IRONLAKE(dev_priv) || IS_SANDYBRIDGE(dev_priv) ||
-	     IS_IVYBRIDGE(dev_priv)) &&
-	    (turn_on || (!needs_scaling(old_plane_state) &&
-			 needs_scaling(new_plane_state))))
-		new_crtc_state->disable_lp_wm = true;
-
-	if (intel_plane_do_async_flip(plane, old_crtc_state, new_crtc_state))
-		new_plane_state->do_async_flip = true;
-
-	return 0;
-}
-
 static bool encoders_cloneable(const struct intel_encoder *a,
 			       const struct intel_encoder *b)
 {
@@ -5266,7 +4926,7 @@ static int intel_crtc_atomic_check(struct intel_atomic_state *state,
 
 	if (mode_changed && crtc_state->hw.enable &&
 	    !drm_WARN_ON(&dev_priv->drm, crtc_state->shared_dpll)) {
-		ret = dev_priv->dpll_funcs->crtc_compute_clock(crtc_state);
+		ret = intel_dpll_crtc_compute_clock(crtc_state);
 		if (ret)
 			return ret;
 	}
@@ -5317,7 +4977,7 @@ static int intel_crtc_atomic_check(struct intel_atomic_state *state,
 	}
 
 	if (HAS_IPS(dev_priv)) {
-		ret = hsw_compute_ips_config(crtc_state);
+		ret = hsw_ips_compute_config(state, crtc);
 		if (ret)
 			return ret;
 	}
@@ -5619,9 +5279,10 @@ static void intel_dump_pipe_config(const struct intel_crtc_state *pipe_config,
 		    transcoder_name(pipe_config->master_transcoder),
 		    pipe_config->sync_mode_slaves_mask);
 
-	drm_dbg_kms(&dev_priv->drm, "bigjoiner: %s\n",
-		    pipe_config->bigjoiner_slave ? "slave" :
-		    pipe_config->bigjoiner ? "master" : "no");
+	drm_dbg_kms(&dev_priv->drm, "bigjoiner: %s, pipes: 0x%x\n",
+		    intel_crtc_is_bigjoiner_slave(pipe_config) ? "slave" :
+		    intel_crtc_is_bigjoiner_master(pipe_config) ? "master" : "no",
+		    pipe_config->bigjoiner_pipes);
 
 	drm_dbg_kms(&dev_priv->drm, "splitter: %s, link count %d, overlap %d\n",
 		    enableddisabled(pipe_config->splitter.enable),
@@ -5818,35 +5479,42 @@ static bool check_digital_port_conflicts(struct intel_atomic_state *state)
 
 static void
 intel_crtc_copy_uapi_to_hw_state_nomodeset(struct intel_atomic_state *state,
-					   struct intel_crtc_state *crtc_state)
+					   struct intel_crtc *crtc)
 {
-	const struct intel_crtc_state *master_crtc_state;
-	struct intel_crtc *master_crtc;
+	struct intel_crtc_state *crtc_state =
+		intel_atomic_get_new_crtc_state(state, crtc);
 
-	master_crtc = intel_master_crtc(crtc_state);
-	master_crtc_state = intel_atomic_get_new_crtc_state(state, master_crtc);
+	WARN_ON(intel_crtc_is_bigjoiner_slave(crtc_state));
 
-	/* No need to copy state if the master state is unchanged */
-	if (master_crtc_state)
-		intel_crtc_copy_color_blobs(crtc_state, master_crtc_state);
+	drm_property_replace_blob(&crtc_state->hw.degamma_lut,
+				  crtc_state->uapi.degamma_lut);
+	drm_property_replace_blob(&crtc_state->hw.gamma_lut,
+				  crtc_state->uapi.gamma_lut);
+	drm_property_replace_blob(&crtc_state->hw.ctm,
+				  crtc_state->uapi.ctm);
 }
 
 static void
-intel_crtc_copy_uapi_to_hw_state(struct intel_atomic_state *state,
-				 struct intel_crtc_state *crtc_state)
+intel_crtc_copy_uapi_to_hw_state_modeset(struct intel_atomic_state *state,
+					 struct intel_crtc *crtc)
 {
+	struct intel_crtc_state *crtc_state =
+		intel_atomic_get_new_crtc_state(state, crtc);
+
+	WARN_ON(intel_crtc_is_bigjoiner_slave(crtc_state));
+
 	crtc_state->hw.enable = crtc_state->uapi.enable;
 	crtc_state->hw.active = crtc_state->uapi.active;
 	crtc_state->hw.mode = crtc_state->uapi.mode;
 	crtc_state->hw.adjusted_mode = crtc_state->uapi.adjusted_mode;
 	crtc_state->hw.scaling_filter = crtc_state->uapi.scaling_filter;
 
-	intel_crtc_copy_uapi_to_hw_state_nomodeset(state, crtc_state);
+	intel_crtc_copy_uapi_to_hw_state_nomodeset(state, crtc);
 }
 
 static void intel_crtc_copy_hw_to_uapi_state(struct intel_crtc_state *crtc_state)
 {
-	if (crtc_state->bigjoiner_slave)
+	if (intel_crtc_is_bigjoiner_slave(crtc_state))
 		return;
 
 	crtc_state->uapi.enable = crtc_state->hw.enable;
@@ -5857,7 +5525,6 @@ static void intel_crtc_copy_hw_to_uapi_state(struct intel_crtc_state *crtc_state
 	crtc_state->uapi.adjusted_mode = crtc_state->hw.adjusted_mode;
 	crtc_state->uapi.scaling_filter = crtc_state->hw.scaling_filter;
 
-	/* copy color blobs to uapi */
 	drm_property_replace_blob(&crtc_state->uapi.degamma_lut,
 				  crtc_state->hw.degamma_lut);
 	drm_property_replace_blob(&crtc_state->uapi.gamma_lut,
@@ -5866,51 +5533,79 @@ static void intel_crtc_copy_hw_to_uapi_state(struct intel_crtc_state *crtc_state
 				  crtc_state->hw.ctm);
 }
 
+static void
+copy_bigjoiner_crtc_state_nomodeset(struct intel_atomic_state *state,
+				    struct intel_crtc *slave_crtc)
+{
+	struct intel_crtc_state *slave_crtc_state =
+		intel_atomic_get_new_crtc_state(state, slave_crtc);
+	struct intel_crtc *master_crtc = intel_master_crtc(slave_crtc_state);
+	const struct intel_crtc_state *master_crtc_state =
+		intel_atomic_get_new_crtc_state(state, master_crtc);
+
+	drm_property_replace_blob(&slave_crtc_state->hw.degamma_lut,
+				  master_crtc_state->hw.degamma_lut);
+	drm_property_replace_blob(&slave_crtc_state->hw.gamma_lut,
+				  master_crtc_state->hw.gamma_lut);
+	drm_property_replace_blob(&slave_crtc_state->hw.ctm,
+				  master_crtc_state->hw.ctm);
+
+	slave_crtc_state->uapi.color_mgmt_changed = master_crtc_state->uapi.color_mgmt_changed;
+}
+
 static int
-copy_bigjoiner_crtc_state(struct intel_crtc_state *crtc_state,
-			  const struct intel_crtc_state *from_crtc_state)
+copy_bigjoiner_crtc_state_modeset(struct intel_atomic_state *state,
+				  struct intel_crtc *slave_crtc)
 {
+	struct intel_crtc_state *slave_crtc_state =
+		intel_atomic_get_new_crtc_state(state, slave_crtc);
+	struct intel_crtc *master_crtc = intel_master_crtc(slave_crtc_state);
+	const struct intel_crtc_state *master_crtc_state =
+		intel_atomic_get_new_crtc_state(state, master_crtc);
 	struct intel_crtc_state *saved_state;
 
-	saved_state = kmemdup(from_crtc_state, sizeof(*saved_state), GFP_KERNEL);
+	saved_state = kmemdup(master_crtc_state, sizeof(*saved_state), GFP_KERNEL);
 	if (!saved_state)
 		return -ENOMEM;
 
-	saved_state->uapi = crtc_state->uapi;
-	saved_state->scaler_state = crtc_state->scaler_state;
-	saved_state->shared_dpll = crtc_state->shared_dpll;
-	saved_state->dpll_hw_state = crtc_state->dpll_hw_state;
-	saved_state->crc_enabled = crtc_state->crc_enabled;
+	/* preserve some things from the slave's original crtc state */
+	saved_state->uapi = slave_crtc_state->uapi;
+	saved_state->scaler_state = slave_crtc_state->scaler_state;
+	saved_state->shared_dpll = slave_crtc_state->shared_dpll;
+	saved_state->dpll_hw_state = slave_crtc_state->dpll_hw_state;
+	saved_state->crc_enabled = slave_crtc_state->crc_enabled;
 
-	intel_crtc_free_hw_state(crtc_state);
-	memcpy(crtc_state, saved_state, sizeof(*crtc_state));
+	intel_crtc_free_hw_state(slave_crtc_state);
+	memcpy(slave_crtc_state, saved_state, sizeof(*slave_crtc_state));
 	kfree(saved_state);
 
 	/* Re-init hw state */
-	memset(&crtc_state->hw, 0, sizeof(saved_state->hw));
-	crtc_state->hw.enable = from_crtc_state->hw.enable;
-	crtc_state->hw.active = from_crtc_state->hw.active;
-	crtc_state->hw.pipe_mode = from_crtc_state->hw.pipe_mode;
-	crtc_state->hw.adjusted_mode = from_crtc_state->hw.adjusted_mode;
+	memset(&slave_crtc_state->hw, 0, sizeof(slave_crtc_state->hw));
+	slave_crtc_state->hw.enable = master_crtc_state->hw.enable;
+	slave_crtc_state->hw.active = master_crtc_state->hw.active;
+	slave_crtc_state->hw.mode = master_crtc_state->hw.mode;
+	slave_crtc_state->hw.pipe_mode = master_crtc_state->hw.pipe_mode;
+	slave_crtc_state->hw.adjusted_mode = master_crtc_state->hw.adjusted_mode;
+	slave_crtc_state->hw.scaling_filter = master_crtc_state->hw.scaling_filter;
+
+	copy_bigjoiner_crtc_state_nomodeset(state, slave_crtc);
 
 	/* Some fixups */
-	crtc_state->uapi.mode_changed = from_crtc_state->uapi.mode_changed;
-	crtc_state->uapi.connectors_changed = from_crtc_state->uapi.connectors_changed;
-	crtc_state->uapi.active_changed = from_crtc_state->uapi.active_changed;
-	crtc_state->nv12_planes = crtc_state->c8_planes = crtc_state->update_planes = 0;
-	crtc_state->bigjoiner_linked_crtc = to_intel_crtc(from_crtc_state->uapi.crtc);
-	crtc_state->bigjoiner_slave = true;
-	crtc_state->cpu_transcoder = from_crtc_state->cpu_transcoder;
-	crtc_state->has_audio = from_crtc_state->has_audio;
+	slave_crtc_state->uapi.mode_changed = master_crtc_state->uapi.mode_changed;
+	slave_crtc_state->uapi.connectors_changed = master_crtc_state->uapi.connectors_changed;
+	slave_crtc_state->uapi.active_changed = master_crtc_state->uapi.active_changed;
+	slave_crtc_state->cpu_transcoder = master_crtc_state->cpu_transcoder;
+	slave_crtc_state->has_audio = master_crtc_state->has_audio;
 
 	return 0;
 }
 
 static int
 intel_crtc_prepare_cleared_state(struct intel_atomic_state *state,
-				 struct intel_crtc_state *crtc_state)
+				 struct intel_crtc *crtc)
 {
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	struct intel_crtc_state *crtc_state =
+		intel_atomic_get_new_crtc_state(state, crtc);
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	struct intel_crtc_state *saved_state;
 
@@ -5940,7 +5635,7 @@ intel_crtc_prepare_cleared_state(struct intel_atomic_state *state,
 	memcpy(crtc_state, saved_state, sizeof(*crtc_state));
 	kfree(saved_state);
 
-	intel_crtc_copy_uapi_to_hw_state(state, crtc_state);
+	intel_crtc_copy_uapi_to_hw_state_modeset(state, crtc);
 
 	return 0;
 }
@@ -6618,6 +6313,7 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
 		PIPE_CONF_CHECK_X(dpll_hw_state.cfgcr1);
 		PIPE_CONF_CHECK_X(dpll_hw_state.cfgcr2);
 		PIPE_CONF_CHECK_X(dpll_hw_state.cfgcr0);
+		PIPE_CONF_CHECK_X(dpll_hw_state.div0);
 		PIPE_CONF_CHECK_X(dpll_hw_state.ebb0);
 		PIPE_CONF_CHECK_X(dpll_hw_state.ebb4);
 		PIPE_CONF_CHECK_X(dpll_hw_state.pll0);
@@ -6669,8 +6365,7 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config,
 	PIPE_CONF_CHECK_X(sync_mode_slaves_mask);
 	PIPE_CONF_CHECK_I(master_transcoder);
 	PIPE_CONF_CHECK_BOOL(bigjoiner);
-	PIPE_CONF_CHECK_BOOL(bigjoiner_slave);
-	PIPE_CONF_CHECK_P(bigjoiner_linked_crtc);
+	PIPE_CONF_CHECK_X(bigjoiner_pipes);
 
 	PIPE_CONF_CHECK_I(dsc.compression_enable);
 	PIPE_CONF_CHECK_I(dsc.dsc_split);
@@ -7456,20 +7151,25 @@ static int intel_crtc_add_bigjoiner_planes(struct intel_atomic_state *state,
 
 static int intel_bigjoiner_add_affected_planes(struct intel_atomic_state *state)
 {
+	struct drm_i915_private *i915 = to_i915(state->base.dev);
 	const struct intel_crtc_state *crtc_state;
 	struct intel_crtc *crtc;
 	int i;
 
 	for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) {
-		int ret;
+		struct intel_crtc *other;
 
-		if (!crtc_state->bigjoiner)
-			continue;
+		for_each_intel_crtc_in_pipe_mask(&i915->drm, other,
+						 crtc_state->bigjoiner_pipes) {
+			int ret;
 
-		ret = intel_crtc_add_bigjoiner_planes(state, crtc,
-						      crtc_state->bigjoiner_linked_crtc);
-		if (ret)
-			return ret;
+			if (crtc == other)
+				continue;
+
+			ret = intel_crtc_add_bigjoiner_planes(state, crtc, other);
+			if (ret)
+				return ret;
+		}
 	}
 
 	return 0;
@@ -7571,71 +7271,123 @@ static bool intel_cpu_transcoders_need_modeset(struct intel_atomic_state *state,
 	return false;
 }
 
+static bool intel_pipes_need_modeset(struct intel_atomic_state *state,
+				     u8 pipes)
+{
+	const struct intel_crtc_state *new_crtc_state;
+	struct intel_crtc *crtc;
+	int i;
+
+	for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) {
+		if (new_crtc_state->hw.enable &&
+		    pipes & BIT(crtc->pipe) &&
+		    intel_crtc_needs_modeset(new_crtc_state))
+			return true;
+	}
+
+	return false;
+}
+
 static int intel_atomic_check_bigjoiner(struct intel_atomic_state *state,
-					struct intel_crtc *crtc,
-					struct intel_crtc_state *old_crtc_state,
-					struct intel_crtc_state *new_crtc_state)
+					struct intel_crtc *master_crtc)
 {
 	struct drm_i915_private *i915 = to_i915(state->base.dev);
-	struct intel_crtc_state *slave_crtc_state, *master_crtc_state;
-	struct intel_crtc *slave_crtc, *master_crtc;
+	struct intel_crtc_state *master_crtc_state =
+		intel_atomic_get_new_crtc_state(state, master_crtc);
+	struct intel_crtc *slave_crtc;
+	u8 slave_pipes;
 
-	/* slave being enabled, is master is still claiming this crtc? */
-	if (old_crtc_state->bigjoiner_slave) {
-		slave_crtc = crtc;
-		master_crtc = old_crtc_state->bigjoiner_linked_crtc;
-		master_crtc_state = intel_atomic_get_new_crtc_state(state, master_crtc);
-		if (!master_crtc_state || !intel_crtc_needs_modeset(master_crtc_state))
-			goto claimed;
-	}
+	/*
+	 * TODO: encoder.compute_config() may be the best
+	 * place to populate the bitmask for the master crtc.
+	 * For now encoder.compute_config() just flags things
+	 * as needing bigjoiner and we populate the bitmask
+	 * here.
+	 */
+	WARN_ON(master_crtc_state->bigjoiner_pipes);
 
-	if (!new_crtc_state->bigjoiner)
+	if (!master_crtc_state->bigjoiner)
 		return 0;
 
-	slave_crtc = intel_dsc_get_bigjoiner_secondary(crtc);
-	if (!slave_crtc) {
+	slave_pipes = BIT(master_crtc->pipe + 1);
+
+	if (slave_pipes & ~bigjoiner_pipes(i915)) {
 		drm_dbg_kms(&i915->drm,
-			    "[CRTC:%d:%s] Big joiner configuration requires "
-			    "CRTC + 1 to be used, doesn't exist\n",
-			    crtc->base.base.id, crtc->base.name);
+			    "[CRTC:%d:%s] Cannot act as big joiner master "
+			    "(need 0x%x as slave pipes, only 0x%x possible)\n",
+			    master_crtc->base.base.id, master_crtc->base.name,
+			    slave_pipes, bigjoiner_pipes(i915));
 		return -EINVAL;
 	}
 
-	new_crtc_state->bigjoiner_linked_crtc = slave_crtc;
-	slave_crtc_state = intel_atomic_get_crtc_state(&state->base, slave_crtc);
-	master_crtc = crtc;
-	if (IS_ERR(slave_crtc_state))
-		return PTR_ERR(slave_crtc_state);
+	for_each_intel_crtc_in_pipe_mask(&i915->drm, slave_crtc, slave_pipes) {
+		struct intel_crtc_state *slave_crtc_state;
+		int ret;
 
-	/* master being enabled, slave was already configured? */
-	if (slave_crtc_state->uapi.enable)
-		goto claimed;
+		slave_crtc_state = intel_atomic_get_crtc_state(&state->base, slave_crtc);
+		if (IS_ERR(slave_crtc_state))
+			return PTR_ERR(slave_crtc_state);
 
-	drm_dbg_kms(&i915->drm,
-		    "[CRTC:%d:%s] Used as slave for big joiner\n",
-		    slave_crtc->base.base.id, slave_crtc->base.name);
+		/* master being enabled, slave was already configured? */
+		if (slave_crtc_state->uapi.enable) {
+			drm_dbg_kms(&i915->drm,
+				    "[CRTC:%d:%s] Slave is enabled as normal CRTC, but "
+				    "[CRTC:%d:%s] claiming this CRTC for bigjoiner.\n",
+				    slave_crtc->base.base.id, slave_crtc->base.name,
+				    master_crtc->base.base.id, master_crtc->base.name);
+			return -EINVAL;
+		}
 
-	return copy_bigjoiner_crtc_state(slave_crtc_state, new_crtc_state);
+		/*
+		 * The state copy logic assumes the master crtc gets processed
+		 * before the slave crtc during the main compute_config loop.
+		 * This works because the crtcs are created in pipe order,
+		 * and the hardware requires master pipe < slave pipe as well.
+		 * Should that change we need to rethink the logic.
+		 */
+		if (WARN_ON(drm_crtc_index(&master_crtc->base) >
+			    drm_crtc_index(&slave_crtc->base)))
+			return -EINVAL;
 
-claimed:
-	drm_dbg_kms(&i915->drm,
-		    "[CRTC:%d:%s] Slave is enabled as normal CRTC, but "
-		    "[CRTC:%d:%s] claiming this CRTC for bigjoiner.\n",
-		    slave_crtc->base.base.id, slave_crtc->base.name,
-		    master_crtc->base.base.id, master_crtc->base.name);
-	return -EINVAL;
+		drm_dbg_kms(&i915->drm,
+			    "[CRTC:%d:%s] Used as slave for big joiner master [CRTC:%d:%s]\n",
+			    slave_crtc->base.base.id, slave_crtc->base.name,
+			    master_crtc->base.base.id, master_crtc->base.name);
+
+		master_crtc_state->bigjoiner_pipes =
+			BIT(master_crtc->pipe) | BIT(slave_crtc->pipe);
+		slave_crtc_state->bigjoiner_pipes =
+			BIT(master_crtc->pipe) | BIT(slave_crtc->pipe);
+
+		ret = copy_bigjoiner_crtc_state_modeset(state, slave_crtc);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
 }
 
 static void kill_bigjoiner_slave(struct intel_atomic_state *state,
-				 struct intel_crtc_state *master_crtc_state)
+				 struct intel_crtc *master_crtc)
 {
-	struct intel_crtc_state *slave_crtc_state =
-		intel_atomic_get_new_crtc_state(state, master_crtc_state->bigjoiner_linked_crtc);
+	struct drm_i915_private *i915 = to_i915(state->base.dev);
+	struct intel_crtc_state *master_crtc_state =
+		intel_atomic_get_new_crtc_state(state, master_crtc);
+	struct intel_crtc *slave_crtc;
+
+	for_each_intel_crtc_in_pipe_mask(&i915->drm, slave_crtc,
+					 intel_crtc_bigjoiner_slave_pipes(master_crtc_state)) {
+		struct intel_crtc_state *slave_crtc_state =
+			intel_atomic_get_new_crtc_state(state, slave_crtc);
 
-	slave_crtc_state->bigjoiner = master_crtc_state->bigjoiner = false;
-	slave_crtc_state->bigjoiner_slave = master_crtc_state->bigjoiner_slave = false;
-	slave_crtc_state->bigjoiner_linked_crtc = master_crtc_state->bigjoiner_linked_crtc = NULL;
-	intel_crtc_copy_uapi_to_hw_state(state, slave_crtc_state);
+		slave_crtc_state->bigjoiner = false;
+		slave_crtc_state->bigjoiner_pipes = 0;
+
+		intel_crtc_copy_uapi_to_hw_state_modeset(state, slave_crtc);
+	}
+
+	master_crtc_state->bigjoiner = false;
+	master_crtc_state->bigjoiner_pipes = 0;
 }
 
 /**
@@ -7785,34 +7537,37 @@ static int intel_atomic_check_async(struct intel_atomic_state *state, struct int
 
 static int intel_bigjoiner_add_affected_crtcs(struct intel_atomic_state *state)
 {
+	struct drm_i915_private *i915 = to_i915(state->base.dev);
 	struct intel_crtc_state *crtc_state;
 	struct intel_crtc *crtc;
+	u8 affected_pipes = 0;
+	u8 modeset_pipes = 0;
 	int i;
 
 	for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) {
-		struct intel_crtc_state *linked_crtc_state;
-		struct intel_crtc *linked_crtc;
-		int ret;
+		affected_pipes |= crtc_state->bigjoiner_pipes;
+		if (intel_crtc_needs_modeset(crtc_state))
+			modeset_pipes |= crtc_state->bigjoiner_pipes;
+	}
 
-		if (!crtc_state->bigjoiner)
-			continue;
+	for_each_intel_crtc_in_pipe_mask(&i915->drm, crtc, affected_pipes) {
+		crtc_state = intel_atomic_get_crtc_state(&state->base, crtc);
+		if (IS_ERR(crtc_state))
+			return PTR_ERR(crtc_state);
+	}
 
-		linked_crtc = crtc_state->bigjoiner_linked_crtc;
-		linked_crtc_state = intel_atomic_get_crtc_state(&state->base, linked_crtc);
-		if (IS_ERR(linked_crtc_state))
-			return PTR_ERR(linked_crtc_state);
+	for_each_intel_crtc_in_pipe_mask(&i915->drm, crtc, modeset_pipes) {
+		int ret;
 
-		if (!intel_crtc_needs_modeset(crtc_state))
-			continue;
+		crtc_state = intel_atomic_get_new_crtc_state(state, crtc);
 
-		linked_crtc_state->uapi.mode_changed = true;
+		crtc_state->uapi.mode_changed = true;
 
-		ret = drm_atomic_add_affected_connectors(&state->base,
-							 &linked_crtc->base);
+		ret = drm_atomic_add_affected_connectors(&state->base, &crtc->base);
 		if (ret)
 			return ret;
 
-		ret = intel_atomic_add_affected_planes(state, linked_crtc);
+		ret = intel_atomic_add_affected_planes(state, crtc);
 		if (ret)
 			return ret;
 	}
@@ -7820,8 +7575,8 @@ static int intel_bigjoiner_add_affected_crtcs(struct intel_atomic_state *state)
 	for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) {
 		/* Kill old bigjoiner link, we may re-establish afterwards */
 		if (intel_crtc_needs_modeset(crtc_state) &&
-		    crtc_state->bigjoiner && !crtc_state->bigjoiner_slave)
-			kill_bigjoiner_slave(state, crtc_state);
+		    intel_crtc_is_bigjoiner_master(crtc_state))
+			kill_bigjoiner_slave(state, crtc);
 	}
 
 	return 0;
@@ -7846,6 +7601,10 @@ static int intel_atomic_check(struct drm_device *dev,
 					    new_crtc_state, i) {
 		if (new_crtc_state->inherited != old_crtc_state->inherited)
 			new_crtc_state->uapi.mode_changed = true;
+
+		if (new_crtc_state->uapi.scaling_filter !=
+		    old_crtc_state->uapi.scaling_filter)
+			new_crtc_state->uapi.mode_changed = true;
 	}
 
 	intel_vrr_check_modeset(state);
@@ -7861,30 +7620,30 @@ static int intel_atomic_check(struct drm_device *dev,
 	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
 					    new_crtc_state, i) {
 		if (!intel_crtc_needs_modeset(new_crtc_state)) {
-			/* Light copy */
-			intel_crtc_copy_uapi_to_hw_state_nomodeset(state, new_crtc_state);
-
+			if (intel_crtc_is_bigjoiner_slave(new_crtc_state))
+				copy_bigjoiner_crtc_state_nomodeset(state, crtc);
+			else
+				intel_crtc_copy_uapi_to_hw_state_nomodeset(state, crtc);
 			continue;
 		}
 
-		if (!new_crtc_state->uapi.enable) {
-			if (!new_crtc_state->bigjoiner_slave) {
-				intel_crtc_copy_uapi_to_hw_state(state, new_crtc_state);
-				any_ms = true;
-			}
+		if (intel_crtc_is_bigjoiner_slave(new_crtc_state)) {
+			drm_WARN_ON(&dev_priv->drm, new_crtc_state->uapi.enable);
 			continue;
 		}
 
-		ret = intel_crtc_prepare_cleared_state(state, new_crtc_state);
+		ret = intel_crtc_prepare_cleared_state(state, crtc);
 		if (ret)
 			goto fail;
 
+		if (!new_crtc_state->hw.enable)
+			continue;
+
 		ret = intel_modeset_pipe_config(state, new_crtc_state);
 		if (ret)
 			goto fail;
 
-		ret = intel_atomic_check_bigjoiner(state, crtc, old_crtc_state,
-						   new_crtc_state);
+		ret = intel_atomic_check_bigjoiner(state, crtc);
 		if (ret)
 			goto fail;
 	}
@@ -7938,10 +7697,7 @@ static int intel_atomic_check(struct drm_device *dev,
 		}
 
 		if (new_crtc_state->bigjoiner) {
-			struct intel_crtc_state *linked_crtc_state =
-				intel_atomic_get_new_crtc_state(state, new_crtc_state->bigjoiner_linked_crtc);
-
-			if (intel_crtc_needs_modeset(linked_crtc_state)) {
+			if (intel_pipes_need_modeset(state, new_crtc_state->bigjoiner_pipes)) {
 				new_crtc_state->uapi.mode_changed = true;
 				new_crtc_state->update_pipe = false;
 			}
@@ -8121,9 +7877,6 @@ static void intel_pipe_fastset(const struct intel_crtc_state *old_crtc_state,
 	if (DISPLAY_VER(dev_priv) >= 9 ||
 	    IS_BROADWELL(dev_priv) || IS_HASWELL(dev_priv))
 		hsw_set_linetime_wm(new_crtc_state);
-
-	if (DISPLAY_VER(dev_priv) >= 11)
-		icl_set_pipe_chicken(new_crtc_state);
 }
 
 static void commit_pipe_pre_planes(struct intel_atomic_state *state,
@@ -8188,7 +7941,7 @@ static void intel_enable_crtc(struct intel_atomic_state *state,
 
 	dev_priv->display->crtc_enable(state, crtc);
 
-	if (new_crtc_state->bigjoiner_slave)
+	if (intel_crtc_is_bigjoiner_slave(new_crtc_state))
 		return;
 
 	/* vblanks work again, re-enable pipe CRC. */
@@ -8198,7 +7951,7 @@ static void intel_enable_crtc(struct intel_atomic_state *state,
 static void intel_update_crtc(struct intel_atomic_state *state,
 			      struct intel_crtc *crtc)
 {
-	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+	struct drm_i915_private *i915 = to_i915(state->base.dev);
 	const struct intel_crtc_state *old_crtc_state =
 		intel_atomic_get_old_crtc_state(state, crtc);
 	struct intel_crtc_state *new_crtc_state =
@@ -8215,21 +7968,22 @@ static void intel_update_crtc(struct intel_atomic_state *state,
 
 		if (new_crtc_state->update_pipe)
 			intel_encoders_update_pipe(state, crtc);
+
+		if (DISPLAY_VER(i915) >= 11 &&
+		    new_crtc_state->update_pipe)
+			icl_set_pipe_chicken(new_crtc_state);
 	}
 
 	intel_fbc_update(state, crtc);
 
-	intel_update_planes_on_crtc(state, crtc);
+	intel_crtc_planes_update_noarm(state, crtc);
 
 	/* Perform vblank evasion around commit operation */
 	intel_pipe_update_start(new_crtc_state);
 
 	commit_pipe_pre_planes(state, crtc);
 
-	if (DISPLAY_VER(dev_priv) >= 9)
-		skl_arm_planes_on_crtc(state, crtc);
-	else
-		i9xx_arm_planes_on_crtc(state, crtc);
+	intel_crtc_planes_update_arm(state, crtc);
 
 	commit_pipe_post_planes(state, crtc);
 
@@ -8305,7 +8059,7 @@ static void intel_commit_modeset_disables(struct intel_atomic_state *state)
 		 */
 		if (!is_trans_port_sync_slave(old_crtc_state) &&
 		    !intel_dp_mst_is_slave_trans(old_crtc_state) &&
-		    !old_crtc_state->bigjoiner_slave)
+		    !intel_crtc_is_bigjoiner_slave(old_crtc_state))
 			continue;
 
 		intel_old_crtc_state_disables(state, old_crtc_state,
@@ -8420,7 +8174,7 @@ static void skl_commit_modeset_enables(struct intel_atomic_state *state)
 
 		if (intel_dp_mst_is_slave_trans(new_crtc_state) ||
 		    is_trans_port_sync_master(new_crtc_state) ||
-		    (new_crtc_state->bigjoiner && !new_crtc_state->bigjoiner_slave))
+		    intel_crtc_is_bigjoiner_master(new_crtc_state))
 			continue;
 
 		modeset_pipes &= ~BIT(pipe);
@@ -8947,10 +8701,8 @@ static u32 intel_encoder_possible_crtcs(struct intel_encoder *encoder)
 	struct intel_crtc *crtc;
 	u32 possible_crtcs = 0;
 
-	for_each_intel_crtc(dev, crtc) {
-		if (encoder->pipe_mask & BIT(crtc->pipe))
-			possible_crtcs |= drm_crtc_mask(&crtc->base);
-	}
+	for_each_intel_crtc_in_pipe_mask(dev, crtc, encoder->pipe_mask)
+		possible_crtcs |= drm_crtc_mask(&crtc->base);
 
 	return possible_crtcs;
 }
@@ -9006,6 +8758,7 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv)
 		intel_ddi_init(dev_priv, PORT_B);
 		intel_ddi_init(dev_priv, PORT_C);
 		intel_ddi_init(dev_priv, PORT_D_XELPD);
+		intel_ddi_init(dev_priv, PORT_TC1);
 	} else if (IS_ALDERLAKE_P(dev_priv)) {
 		intel_ddi_init(dev_priv, PORT_A);
 		intel_ddi_init(dev_priv, PORT_B);
@@ -10132,7 +9885,7 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc,
 	/* Adjust the state of the output pipe according to whether we
 	 * have active connectors/encoders. */
 	if (crtc_state->hw.active && !intel_crtc_has_encoders(crtc) &&
-	    !crtc_state->bigjoiner_slave)
+	    !intel_crtc_is_bigjoiner_slave(crtc_state))
 		intel_crtc_disable_noatomic(crtc, ctx);
 
 	if (crtc_state->hw.active || HAS_GMCH(dev_priv)) {
@@ -10345,12 +10098,18 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev)
 
 			/* read out to slave crtc as well for bigjoiner */
 			if (crtc_state->bigjoiner) {
+				struct intel_crtc *slave_crtc;
+
 				/* encoder should read be linked to bigjoiner master */
-				WARN_ON(crtc_state->bigjoiner_slave);
+				WARN_ON(intel_crtc_is_bigjoiner_slave(crtc_state));
 
-				crtc = crtc_state->bigjoiner_linked_crtc;
-				crtc_state = to_intel_crtc_state(crtc->base.state);
-				intel_encoder_get_config(encoder, crtc_state);
+				for_each_intel_crtc_in_pipe_mask(&dev_priv->drm, slave_crtc,
+								 intel_crtc_bigjoiner_slave_pipes(crtc_state)) {
+					struct intel_crtc_state *slave_crtc_state;
+
+					slave_crtc_state = to_intel_crtc_state(slave_crtc->base.state);
+					intel_encoder_get_config(encoder, slave_crtc_state);
+				}
 			}
 		} else {
 			encoder->base.crtc = NULL;
diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h
index 457738aeee3e789ae4dcfe5e8652eaa34d67308f..11d6134c53c84d0d8828a5f254d8f28e9ce6392c 100644
--- a/drivers/gpu/drm/i915/display/intel_display.h
+++ b/drivers/gpu/drm/i915/display/intel_display.h
@@ -430,11 +430,11 @@ enum hpd_pin {
 			    &(dev)->mode_config.crtc_list,		\
 			    base.head)
 
-#define for_each_intel_crtc_mask(dev, intel_crtc, crtc_mask)		\
+#define for_each_intel_crtc_in_pipe_mask(dev, intel_crtc, pipe_mask)	\
 	list_for_each_entry(intel_crtc,					\
 			    &(dev)->mode_config.crtc_list,		\
 			    base.head)					\
-		for_each_if((crtc_mask) & drm_crtc_mask(&intel_crtc->base))
+		for_each_if((pipe_mask) & BIT(intel_crtc->pipe))
 
 #define for_each_intel_encoder(dev, intel_encoder)		\
 	list_for_each_entry(intel_encoder,			\
@@ -555,6 +555,10 @@ intel_mode_valid_max_plane_size(struct drm_i915_private *dev_priv,
 				bool bigjoiner);
 enum phy intel_port_to_phy(struct drm_i915_private *i915, enum port port);
 bool is_trans_port_sync_mode(const struct intel_crtc_state *state);
+bool intel_crtc_is_bigjoiner_slave(const struct intel_crtc_state *crtc_state);
+bool intel_crtc_is_bigjoiner_master(const struct intel_crtc_state *crtc_state);
+u8 intel_crtc_bigjoiner_slave_pipes(const struct intel_crtc_state *crtc_state);
+struct intel_crtc *intel_master_crtc(const struct intel_crtc_state *crtc_state);
 
 void intel_plane_destroy(struct drm_plane *plane);
 void intel_enable_transcoder(const struct intel_crtc_state *new_crtc_state);
@@ -632,9 +636,6 @@ void intel_cpu_transcoder_get_m2_n2(struct intel_crtc *crtc,
 void i9xx_crtc_clock_get(struct intel_crtc *crtc,
 			 struct intel_crtc_state *pipe_config);
 int intel_dotclock_calculate(int link_freq, const struct intel_link_m_n *m_n);
-bool hsw_crtc_state_ips_capable(const struct intel_crtc_state *crtc_state);
-void hsw_enable_ips(const struct intel_crtc_state *crtc_state);
-void hsw_disable_ips(const struct intel_crtc_state *crtc_state);
 enum intel_display_power_domain intel_port_to_power_domain(enum port port);
 enum intel_display_power_domain
 intel_aux_power_domain(struct intel_digital_port *dig_port);
diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.c b/drivers/gpu/drm/i915/display/intel_display_debugfs.c
index f4de004d470f09f82a97e3634ecac475e82c1a2b..ffe6822d7414a454a10f0e8f0a66a0b51aa988ee 100644
--- a/drivers/gpu/drm/i915/display/intel_display_debugfs.c
+++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.c
@@ -16,6 +16,7 @@
 #include "intel_dp_mst.h"
 #include "intel_drrs.h"
 #include "intel_fbc.h"
+#include "intel_fbdev.h"
 #include "intel_hdcp.h"
 #include "intel_hdmi.h"
 #include "intel_pm.h"
@@ -78,7 +79,7 @@ static int i915_sr_status(struct seq_file *m, void *unused)
 	if (DISPLAY_VER(dev_priv) >= 9)
 		/* no global SR status; inspect per-plane WM */;
 	else if (HAS_PCH_SPLIT(dev_priv))
-		sr_enabled = intel_de_read(dev_priv, WM1_LP_ILK) & WM1_LP_SR_EN;
+		sr_enabled = intel_de_read(dev_priv, WM1_LP_ILK) & WM_LP_ENABLE;
 	else if (IS_I965GM(dev_priv) || IS_G4X(dev_priv) ||
 		 IS_I945G(dev_priv) || IS_I945GM(dev_priv))
 		sr_enabled = intel_de_read(dev_priv, FW_BLC_SELF) & FW_BLC_SELF_EN;
@@ -124,9 +125,8 @@ static int i915_gem_framebuffer_info(struct seq_file *m, void *data)
 	struct drm_framebuffer *drm_fb;
 
 #ifdef CONFIG_DRM_FBDEV_EMULATION
-	if (dev_priv->fbdev && dev_priv->fbdev->helper.fb) {
-		fbdev_fb = to_intel_framebuffer(dev_priv->fbdev->helper.fb);
-
+	fbdev_fb = intel_fbdev_framebuffer(dev_priv->fbdev);
+	if (fbdev_fb) {
 		seq_printf(m, "fbcon size: %d x %d, depth %d, %d bpp, modifier 0x%llx, refcount %d, obj ",
 			   fbdev_fb->base.width,
 			   fbdev_fb->base.height,
@@ -474,8 +474,8 @@ static int i915_dmc_info(struct seq_file *m, void *unused)
 		 * reg for DC3CO debugging and validation,
 		 * but TGL DMC f/w is using DMC_DEBUG3 reg for DC3CO counter.
 		 */
-		seq_printf(m, "DC3CO count: %d\n",
-			   intel_de_read(dev_priv, DMC_DEBUG3));
+		seq_printf(m, "DC3CO count: %d\n", intel_de_read(dev_priv, IS_DGFX(dev_priv) ?
+					DG1_DMC_DEBUG3 : TGL_DMC_DEBUG3));
 	} else {
 		dc5_reg = IS_BROXTON(dev_priv) ? BXT_DMC_DC3_DC5_COUNT :
 						 SKL_DMC_DC3_DC5_COUNT;
@@ -923,23 +923,23 @@ static void intel_crtc_info(struct seq_file *m, struct intel_crtc *crtc)
 		   yesno(crtc_state->uapi.active),
 		   DRM_MODE_ARG(&crtc_state->uapi.mode));
 
-	if (crtc_state->hw.enable) {
-		seq_printf(m, "\thw: active=%s, adjusted_mode=" DRM_MODE_FMT "\n",
-			   yesno(crtc_state->hw.active),
-			   DRM_MODE_ARG(&crtc_state->hw.adjusted_mode));
+	seq_printf(m, "\thw: enable=%s, active=%s\n",
+		   yesno(crtc_state->hw.enable), yesno(crtc_state->hw.active));
+	seq_printf(m, "\tadjusted_mode=" DRM_MODE_FMT "\n",
+		   DRM_MODE_ARG(&crtc_state->hw.adjusted_mode));
+	seq_printf(m, "\tpipe__mode=" DRM_MODE_FMT "\n",
+		   DRM_MODE_ARG(&crtc_state->hw.pipe_mode));
 
-		seq_printf(m, "\tpipe src size=%dx%d, dither=%s, bpp=%d\n",
-			   crtc_state->pipe_src_w, crtc_state->pipe_src_h,
-			   yesno(crtc_state->dither), crtc_state->pipe_bpp);
+	seq_printf(m, "\tpipe src size=%dx%d, dither=%s, bpp=%d\n",
+		   crtc_state->pipe_src_w, crtc_state->pipe_src_h,
+		   yesno(crtc_state->dither), crtc_state->pipe_bpp);
 
-		intel_scaler_info(m, crtc);
-	}
+	intel_scaler_info(m, crtc);
 
 	if (crtc_state->bigjoiner)
-		seq_printf(m, "\tLinked to [CRTC:%d:%s] as a %s\n",
-			   crtc_state->bigjoiner_linked_crtc->base.base.id,
-			   crtc_state->bigjoiner_linked_crtc->base.name,
-			   crtc_state->bigjoiner_slave ? "slave" : "master");
+		seq_printf(m, "\tLinked to 0x%x pipes as a %s\n",
+			   crtc_state->bigjoiner_pipes,
+			   intel_crtc_is_bigjoiner_slave(crtc_state) ? "slave" : "master");
 
 	for_each_intel_encoder_mask(&dev_priv->drm, encoder,
 				    crtc_state->uapi.encoder_mask)
@@ -1015,6 +1015,7 @@ static int i915_shared_dplls_info(struct seq_file *m, void *unused)
 		seq_printf(m, " wrpll:   0x%08x\n", pll->state.hw_state.wrpll);
 		seq_printf(m, " cfgcr0:  0x%08x\n", pll->state.hw_state.cfgcr0);
 		seq_printf(m, " cfgcr1:  0x%08x\n", pll->state.hw_state.cfgcr1);
+		seq_printf(m, " div0:    0x%08x\n", pll->state.hw_state.div0);
 		seq_printf(m, " mg_refclkin_ctl:        0x%08x\n",
 			   pll->state.hw_state.mg_refclkin_ctl);
 		seq_printf(m, " mg_clktop2_coreclkctl1: 0x%08x\n",
diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c
index d2102cc17bb4c2919cd68fb807fdd929d5af922b..9ebae7ac32356e925830bc06fdf06684b49ed324 100644
--- a/drivers/gpu/drm/i915/display/intel_display_power.c
+++ b/drivers/gpu/drm/i915/display/intel_display_power.c
@@ -16,6 +16,7 @@
 #include "intel_dpio_phy.h"
 #include "intel_dpll.h"
 #include "intel_hotplug.h"
+#include "intel_mchbar_regs.h"
 #include "intel_pch_refclk.h"
 #include "intel_pcode.h"
 #include "intel_pm.h"
diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
index 60e15226a8cb37e6456ad010d7bed1a4a43aca2d..b50d0e6efe211bd3a309cae315e7a4908565e792 100644
--- a/drivers/gpu/drm/i915/display/intel_display_types.h
+++ b/drivers/gpu/drm/i915/display/intel_display_types.h
@@ -26,7 +26,6 @@
 #ifndef __INTEL_DISPLAY_TYPES_H__
 #define __INTEL_DISPLAY_TYPES_H__
 
-#include <linux/async.h>
 #include <linux/i2c.h>
 #include <linux/pm_qos.h>
 #include <linux/pwm.h>
@@ -38,7 +37,6 @@
 #include <drm/drm_crtc.h>
 #include <drm/drm_dsc.h>
 #include <drm/drm_encoder.h>
-#include <drm/drm_fb_helper.h>
 #include <drm/drm_fourcc.h>
 #include <drm/drm_probe_helper.h>
 #include <drm/drm_rect.h>
@@ -145,25 +143,6 @@ struct intel_framebuffer {
 	struct i915_address_space *dpt_vm;
 };
 
-struct intel_fbdev {
-	struct drm_fb_helper helper;
-	struct intel_framebuffer *fb;
-	struct i915_vma *vma;
-	unsigned long vma_flags;
-	async_cookie_t cookie;
-	int preferred_bpp;
-
-	/* Whether or not fbdev hpd processing is temporarily suspended */
-	bool hpd_suspended : 1;
-	/* Set when a hotplug was received while HPD processing was
-	 * suspended
-	 */
-	bool hpd_waiting : 1;
-
-	/* Protects hpd_suspended */
-	struct mutex hpd_lock;
-};
-
 enum intel_hotplug_state {
 	INTEL_HOTPLUG_UNCHANGED,
 	INTEL_HOTPLUG_CHANGED,
@@ -1168,6 +1147,7 @@ struct intel_crtc_state {
 
 	/* bitmask of actually visible planes (enum plane_id) */
 	u8 active_planes;
+	u8 scaled_planes;
 	u8 nv12_planes;
 	u8 c8_planes;
 
@@ -1202,11 +1182,8 @@ struct intel_crtc_state {
 	/* enable pipe big joiner? */
 	bool bigjoiner;
 
-	/* big joiner slave crtc? */
-	bool bigjoiner_slave;
-
-	/* linked crtc for bigjoiner, either slave or master */
-	struct intel_crtc *bigjoiner_linked_crtc;
+	/* big joiner pipe bitmask */
+	u8 bigjoiner_pipes;
 
 	/* Display Stream compression state */
 	struct {
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index 146b83916005525b3cb35e3bf0b3bb5597cabbe8..1046e7fe310a9e61c3edd5c1f003823dee95e447 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -886,9 +886,8 @@ intel_dp_mode_valid_downstream(struct intel_connector *connector,
 		return MODE_CLOCK_HIGH;
 
 	/* Assume 8bpc for the DP++/HDMI/DVI TMDS clock check */
-	tmds_clock = target_clock;
-	if (drm_mode_is_420_only(info, mode))
-		tmds_clock /= 2;
+	tmds_clock = intel_hdmi_tmds_clock(target_clock, 8,
+					   drm_mode_is_420_only(info, mode));
 
 	if (intel_dp->dfp.min_tmds_clock &&
 	    tmds_clock < intel_dp->dfp.min_tmds_clock)
@@ -1139,21 +1138,12 @@ static bool intel_dp_hdmi_ycbcr420(struct intel_dp *intel_dp,
 		 intel_dp->dfp.ycbcr_444_to_420);
 }
 
-static int intel_dp_hdmi_tmds_clock(struct intel_dp *intel_dp,
-				    const struct intel_crtc_state *crtc_state, int bpc)
-{
-	int clock = crtc_state->hw.adjusted_mode.crtc_clock * bpc / 8;
-
-	if (intel_dp_hdmi_ycbcr420(intel_dp, crtc_state))
-		clock /= 2;
-
-	return clock;
-}
-
 static bool intel_dp_hdmi_tmds_clock_valid(struct intel_dp *intel_dp,
 					   const struct intel_crtc_state *crtc_state, int bpc)
 {
-	int tmds_clock = intel_dp_hdmi_tmds_clock(intel_dp, crtc_state, bpc);
+	int clock = crtc_state->hw.adjusted_mode.crtc_clock;
+	int tmds_clock = intel_hdmi_tmds_clock(clock, bpc,
+					       intel_dp_hdmi_ycbcr420(intel_dp, crtc_state));
 
 	if (intel_dp->dfp.min_tmds_clock &&
 	    tmds_clock < intel_dp->dfp.min_tmds_clock)
@@ -3628,6 +3618,32 @@ static void intel_dp_handle_test_request(struct intel_dp *intel_dp)
 			    "Could not write test response to sink\n");
 }
 
+static bool intel_dp_link_ok(struct intel_dp *intel_dp,
+			     u8 link_status[DP_LINK_STATUS_SIZE])
+{
+	struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base;
+	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
+	bool uhbr = intel_dp->link_rate >= 1000000;
+	bool ok;
+
+	if (uhbr)
+		ok = drm_dp_128b132b_lane_channel_eq_done(link_status,
+							  intel_dp->lane_count);
+	else
+		ok = drm_dp_channel_eq_ok(link_status, intel_dp->lane_count);
+
+	if (ok)
+		return true;
+
+	intel_dp_dump_link_status(intel_dp, DP_PHY_DPRX, link_status);
+	drm_dbg_kms(&i915->drm,
+		    "[ENCODER:%d:%s] %s link not ok, retraining\n",
+		    encoder->base.base.id, encoder->base.name,
+		    uhbr ? "128b/132b" : "8b/10b");
+
+	return false;
+}
+
 static void
 intel_dp_mst_hpd_irq(struct intel_dp *intel_dp, u8 *esi, u8 *ack)
 {
@@ -3658,14 +3674,7 @@ static bool intel_dp_mst_link_status(struct intel_dp *intel_dp)
 		return false;
 	}
 
-	if (!drm_dp_channel_eq_ok(link_status, intel_dp->lane_count)) {
-		drm_dbg_kms(&i915->drm,
-			    "[ENCODER:%d:%s] channel EQ not ok, retraining\n",
-			    encoder->base.base.id, encoder->base.name);
-		return false;
-	}
-
-	return true;
+	return intel_dp_link_ok(intel_dp, link_status);
 }
 
 /**
@@ -3779,8 +3788,8 @@ intel_dp_needs_link_retrain(struct intel_dp *intel_dp)
 					intel_dp->lane_count))
 		return false;
 
-	/* Retrain if Channel EQ or CR not ok */
-	return !drm_dp_channel_eq_ok(link_status, intel_dp->lane_count);
+	/* Retrain if link not ok */
+	return !intel_dp_link_ok(intel_dp, link_status);
 }
 
 static bool intel_dp_has_connector(struct intel_dp *intel_dp,
@@ -3810,14 +3819,14 @@ static bool intel_dp_has_connector(struct intel_dp *intel_dp,
 
 static int intel_dp_prep_link_retrain(struct intel_dp *intel_dp,
 				      struct drm_modeset_acquire_ctx *ctx,
-				      u32 *crtc_mask)
+				      u8 *pipe_mask)
 {
 	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
 	struct drm_connector_list_iter conn_iter;
 	struct intel_connector *connector;
 	int ret = 0;
 
-	*crtc_mask = 0;
+	*pipe_mask = 0;
 
 	if (!intel_dp_needs_link_retrain(intel_dp))
 		return 0;
@@ -3851,12 +3860,12 @@ static int intel_dp_prep_link_retrain(struct intel_dp *intel_dp,
 		    !try_wait_for_completion(&conn_state->commit->hw_done))
 			continue;
 
-		*crtc_mask |= drm_crtc_mask(&crtc->base);
+		*pipe_mask |= BIT(crtc->pipe);
 	}
 	drm_connector_list_iter_end(&conn_iter);
 
 	if (!intel_dp_needs_link_retrain(intel_dp))
-		*crtc_mask = 0;
+		*pipe_mask = 0;
 
 	return ret;
 }
@@ -3875,7 +3884,7 @@ int intel_dp_retrain_link(struct intel_encoder *encoder,
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	struct intel_crtc *crtc;
-	u32 crtc_mask;
+	u8 pipe_mask;
 	int ret;
 
 	if (!intel_dp_is_connected(intel_dp))
@@ -3886,17 +3895,17 @@ int intel_dp_retrain_link(struct intel_encoder *encoder,
 	if (ret)
 		return ret;
 
-	ret = intel_dp_prep_link_retrain(intel_dp, ctx, &crtc_mask);
+	ret = intel_dp_prep_link_retrain(intel_dp, ctx, &pipe_mask);
 	if (ret)
 		return ret;
 
-	if (crtc_mask == 0)
+	if (pipe_mask == 0)
 		return 0;
 
 	drm_dbg_kms(&dev_priv->drm, "[ENCODER:%d:%s] retraining link\n",
 		    encoder->base.base.id, encoder->base.name);
 
-	for_each_intel_crtc_mask(&dev_priv->drm, crtc, crtc_mask) {
+	for_each_intel_crtc_in_pipe_mask(&dev_priv->drm, crtc, pipe_mask) {
 		const struct intel_crtc_state *crtc_state =
 			to_intel_crtc_state(crtc->base.state);
 
@@ -3907,7 +3916,7 @@ int intel_dp_retrain_link(struct intel_encoder *encoder,
 							      intel_crtc_pch_transcoder(crtc), false);
 	}
 
-	for_each_intel_crtc_mask(&dev_priv->drm, crtc, crtc_mask) {
+	for_each_intel_crtc_in_pipe_mask(&dev_priv->drm, crtc, pipe_mask) {
 		const struct intel_crtc_state *crtc_state =
 			to_intel_crtc_state(crtc->base.state);
 
@@ -3924,7 +3933,7 @@ int intel_dp_retrain_link(struct intel_encoder *encoder,
 		break;
 	}
 
-	for_each_intel_crtc_mask(&dev_priv->drm, crtc, crtc_mask) {
+	for_each_intel_crtc_in_pipe_mask(&dev_priv->drm, crtc, pipe_mask) {
 		const struct intel_crtc_state *crtc_state =
 			to_intel_crtc_state(crtc->base.state);
 
@@ -3942,14 +3951,14 @@ int intel_dp_retrain_link(struct intel_encoder *encoder,
 
 static int intel_dp_prep_phy_test(struct intel_dp *intel_dp,
 				  struct drm_modeset_acquire_ctx *ctx,
-				  u32 *crtc_mask)
+				  u8 *pipe_mask)
 {
 	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
 	struct drm_connector_list_iter conn_iter;
 	struct intel_connector *connector;
 	int ret = 0;
 
-	*crtc_mask = 0;
+	*pipe_mask = 0;
 
 	drm_connector_list_iter_begin(&i915->drm, &conn_iter);
 	for_each_intel_connector_iter(connector, &conn_iter) {
@@ -3980,7 +3989,7 @@ static int intel_dp_prep_phy_test(struct intel_dp *intel_dp,
 		    !try_wait_for_completion(&conn_state->commit->hw_done))
 			continue;
 
-		*crtc_mask |= drm_crtc_mask(&crtc->base);
+		*pipe_mask |= BIT(crtc->pipe);
 	}
 	drm_connector_list_iter_end(&conn_iter);
 
@@ -3993,7 +4002,7 @@ static int intel_dp_do_phy_test(struct intel_encoder *encoder,
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	struct intel_crtc *crtc;
-	u32 crtc_mask;
+	u8 pipe_mask;
 	int ret;
 
 	ret = drm_modeset_lock(&dev_priv->drm.mode_config.connection_mutex,
@@ -4001,17 +4010,17 @@ static int intel_dp_do_phy_test(struct intel_encoder *encoder,
 	if (ret)
 		return ret;
 
-	ret = intel_dp_prep_phy_test(intel_dp, ctx, &crtc_mask);
+	ret = intel_dp_prep_phy_test(intel_dp, ctx, &pipe_mask);
 	if (ret)
 		return ret;
 
-	if (crtc_mask == 0)
+	if (pipe_mask == 0)
 		return 0;
 
 	drm_dbg_kms(&dev_priv->drm, "[ENCODER:%d:%s] PHY test\n",
 		    encoder->base.base.id, encoder->base.name);
 
-	for_each_intel_crtc_mask(&dev_priv->drm, crtc, crtc_mask) {
+	for_each_intel_crtc_in_pipe_mask(&dev_priv->drm, crtc, pipe_mask) {
 		const struct intel_crtc_state *crtc_state =
 			to_intel_crtc_state(crtc->base.state);
 
diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
index 9451f336f28fcd022f4c0b22db73b0b59fb14876..5d98773efd1b3d847e5c309b145a3fa8b8400f6e 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
@@ -712,7 +712,7 @@ static bool intel_dp_adjust_request_changed(const struct intel_crtc_state *crtc_
 	return false;
 }
 
-static void
+void
 intel_dp_dump_link_status(struct intel_dp *intel_dp, enum drm_dp_phy dp_phy,
 			  const u8 link_status[DP_LINK_STATUS_SIZE])
 {
@@ -996,6 +996,23 @@ static bool intel_dp_disable_dpcd_training_pattern(struct intel_dp *intel_dp,
 	return drm_dp_dpcd_write(&intel_dp->aux, reg, &val, 1) == 1;
 }
 
+static int
+intel_dp_128b132b_intra_hop(struct intel_dp *intel_dp,
+			    const struct intel_crtc_state *crtc_state)
+{
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
+	u8 sink_status;
+	int ret;
+
+	ret = drm_dp_dpcd_readb(&intel_dp->aux, DP_SINK_STATUS, &sink_status);
+	if (ret != 1) {
+		drm_dbg_kms(&i915->drm, "Failed to read sink status\n");
+		return ret < 0 ? ret : -EIO;
+	}
+
+	return sink_status & DP_INTRA_HOP_AUX_REPLY_INDICATION ? 1 : 0;
+}
+
 /**
  * intel_dp_stop_link_train - stop link training
  * @intel_dp: DP struct
@@ -1015,11 +1032,21 @@ static bool intel_dp_disable_dpcd_training_pattern(struct intel_dp *intel_dp,
 void intel_dp_stop_link_train(struct intel_dp *intel_dp,
 			      const struct intel_crtc_state *crtc_state)
 {
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
+	struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base;
+
 	intel_dp->link_trained = true;
 
 	intel_dp_disable_dpcd_training_pattern(intel_dp, DP_PHY_DPRX);
 	intel_dp_program_link_training_pattern(intel_dp, crtc_state, DP_PHY_DPRX,
 					       DP_TRAINING_PATTERN_DISABLE);
+
+	if (intel_dp_is_uhbr(crtc_state) &&
+	    wait_for(intel_dp_128b132b_intra_hop(intel_dp, crtc_state) == 0, 500)) {
+		drm_dbg_kms(&i915->drm,
+			    "[ENCODER:%d:%s] 128b/132b intra-hop not clearing\n",
+			    encoder->base.base.id, encoder->base.name);
+	}
 }
 
 static bool
@@ -1083,8 +1110,6 @@ intel_dp_link_train_all_phys(struct intel_dp *intel_dp,
 	bool ret = true;
 	int i;
 
-	intel_dp_prepare_link_train(intel_dp, crtc_state);
-
 	for (i = lttpr_count - 1; i >= 0; i--) {
 		enum drm_dp_phy dp_phy = DP_PHY_LTTPR(i);
 
@@ -1104,6 +1129,272 @@ intel_dp_link_train_all_phys(struct intel_dp *intel_dp,
 	return ret;
 }
 
+/*
+ * 128b/132b DP LANEx_EQ_DONE Sequence (DP 2.0 E11 3.5.2.16.1)
+ */
+static bool
+intel_dp_128b132b_lane_eq(struct intel_dp *intel_dp,
+			  const struct intel_crtc_state *crtc_state)
+{
+	struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base;
+	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
+	u8 link_status[DP_LINK_STATUS_SIZE];
+	int delay_us;
+	int try, max_tries = 20;
+	unsigned long deadline;
+	bool timeout = false;
+
+	/*
+	 * Reset signal levels. Start transmitting 128b/132b TPS1.
+	 *
+	 * Put DPRX and LTTPRs (if any) into intra-hop AUX mode by writing TPS1
+	 * in DP_TRAINING_PATTERN_SET.
+	 */
+	if (!intel_dp_reset_link_train(intel_dp, crtc_state, DP_PHY_DPRX,
+				       DP_TRAINING_PATTERN_1)) {
+		drm_err(&i915->drm,
+			"[ENCODER:%d:%s] Failed to start 128b/132b TPS1\n",
+			encoder->base.base.id, encoder->base.name);
+		return false;
+	}
+
+	delay_us = drm_dp_128b132b_read_aux_rd_interval(&intel_dp->aux);
+
+	/* Read the initial TX FFE settings. */
+	if (drm_dp_dpcd_read_link_status(&intel_dp->aux, link_status) < 0) {
+		drm_err(&i915->drm,
+			"[ENCODER:%d:%s] Failed to read TX FFE presets\n",
+			encoder->base.base.id, encoder->base.name);
+		return false;
+	}
+
+	/* Update signal levels and training set as requested. */
+	intel_dp_get_adjust_train(intel_dp, crtc_state, DP_PHY_DPRX, link_status);
+	if (!intel_dp_update_link_train(intel_dp, crtc_state, DP_PHY_DPRX)) {
+		drm_err(&i915->drm,
+			"[ENCODER:%d:%s] Failed to set initial TX FFE settings\n",
+			encoder->base.base.id, encoder->base.name);
+		return false;
+	}
+
+	/* Start transmitting 128b/132b TPS2. */
+	if (!intel_dp_set_link_train(intel_dp, crtc_state, DP_PHY_DPRX,
+				     DP_TRAINING_PATTERN_2)) {
+		drm_err(&i915->drm,
+			"[ENCODER:%d:%s] Failed to start 128b/132b TPS2\n",
+			encoder->base.base.id, encoder->base.name);
+		return false;
+	}
+
+	/* Time budget for the LANEx_EQ_DONE Sequence */
+	deadline = jiffies + msecs_to_jiffies_timeout(400);
+
+	for (try = 0; try < max_tries; try++) {
+		usleep_range(delay_us, 2 * delay_us);
+
+		/*
+		 * The delay may get updated. The transmitter shall read the
+		 * delay before link status during link training.
+		 */
+		delay_us = drm_dp_128b132b_read_aux_rd_interval(&intel_dp->aux);
+
+		if (drm_dp_dpcd_read_link_status(&intel_dp->aux, link_status) < 0) {
+			drm_err(&i915->drm,
+				"[ENCODER:%d:%s] Failed to read link status\n",
+				encoder->base.base.id, encoder->base.name);
+			return false;
+		}
+
+		if (drm_dp_128b132b_link_training_failed(link_status)) {
+			intel_dp_dump_link_status(intel_dp, DP_PHY_DPRX, link_status);
+			drm_err(&i915->drm,
+				"[ENCODER:%d:%s] Downstream link training failure\n",
+				encoder->base.base.id, encoder->base.name);
+			return false;
+		}
+
+		if (drm_dp_128b132b_lane_channel_eq_done(link_status, crtc_state->lane_count)) {
+			drm_dbg_kms(&i915->drm,
+				    "[ENCODER:%d:%s] Lane channel eq done\n",
+				    encoder->base.base.id, encoder->base.name);
+			break;
+		}
+
+		if (timeout) {
+			intel_dp_dump_link_status(intel_dp, DP_PHY_DPRX, link_status);
+			drm_err(&i915->drm,
+				"[ENCODER:%d:%s] Lane channel eq timeout\n",
+				encoder->base.base.id, encoder->base.name);
+			return false;
+		}
+
+		if (time_after(jiffies, deadline))
+			timeout = true; /* try one last time after deadline */
+
+		/* Update signal levels and training set as requested. */
+		intel_dp_get_adjust_train(intel_dp, crtc_state, DP_PHY_DPRX, link_status);
+		if (!intel_dp_update_link_train(intel_dp, crtc_state, DP_PHY_DPRX)) {
+			drm_err(&i915->drm,
+				"[ENCODER:%d:%s] Failed to update TX FFE settings\n",
+				encoder->base.base.id, encoder->base.name);
+			return false;
+		}
+	}
+
+	if (try == max_tries) {
+		intel_dp_dump_link_status(intel_dp, DP_PHY_DPRX, link_status);
+		drm_err(&i915->drm,
+			"[ENCODER:%d:%s] Max loop count reached\n",
+			encoder->base.base.id, encoder->base.name);
+		return false;
+	}
+
+	for (;;) {
+		if (time_after(jiffies, deadline))
+			timeout = true; /* try one last time after deadline */
+
+		if (drm_dp_dpcd_read_link_status(&intel_dp->aux, link_status) < 0) {
+			drm_err(&i915->drm,
+				"[ENCODER:%d:%s] Failed to read link status\n",
+				encoder->base.base.id, encoder->base.name);
+			return false;
+		}
+
+		if (drm_dp_128b132b_link_training_failed(link_status)) {
+			intel_dp_dump_link_status(intel_dp, DP_PHY_DPRX, link_status);
+			drm_err(&i915->drm,
+				"[ENCODER:%d:%s] Downstream link training failure\n",
+				encoder->base.base.id, encoder->base.name);
+			return false;
+		}
+
+		if (drm_dp_128b132b_eq_interlane_align_done(link_status)) {
+			drm_dbg_kms(&i915->drm,
+				    "[ENCODER:%d:%s] Interlane align done\n",
+				    encoder->base.base.id, encoder->base.name);
+			break;
+		}
+
+		if (timeout) {
+			intel_dp_dump_link_status(intel_dp, DP_PHY_DPRX, link_status);
+			drm_err(&i915->drm,
+				"[ENCODER:%d:%s] Interlane align timeout\n",
+				encoder->base.base.id, encoder->base.name);
+			return false;
+		}
+
+		usleep_range(2000, 3000);
+	}
+
+	return true;
+}
+
+/*
+ * 128b/132b DP LANEx_CDS_DONE Sequence (DP 2.0 E11 3.5.2.16.2)
+ */
+static bool
+intel_dp_128b132b_lane_cds(struct intel_dp *intel_dp,
+			   const struct intel_crtc_state *crtc_state,
+			   int lttpr_count)
+{
+	struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base;
+	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
+	u8 link_status[DP_LINK_STATUS_SIZE];
+	unsigned long deadline;
+
+	if (drm_dp_dpcd_writeb(&intel_dp->aux, DP_TRAINING_PATTERN_SET,
+			       DP_TRAINING_PATTERN_2_CDS) != 1) {
+		drm_err(&i915->drm,
+			"[ENCODER:%d:%s] Failed to start 128b/132b TPS2 CDS\n",
+			encoder->base.base.id, encoder->base.name);
+		return false;
+	}
+
+	/* Time budget for the LANEx_CDS_DONE Sequence */
+	deadline = jiffies + msecs_to_jiffies_timeout((lttpr_count + 1) * 20);
+
+	for (;;) {
+		bool timeout = false;
+
+		if (time_after(jiffies, deadline))
+			timeout = true; /* try one last time after deadline */
+
+		usleep_range(2000, 3000);
+
+		if (drm_dp_dpcd_read_link_status(&intel_dp->aux, link_status) < 0) {
+			drm_err(&i915->drm,
+				"[ENCODER:%d:%s] Failed to read link status\n",
+				encoder->base.base.id, encoder->base.name);
+			return false;
+		}
+
+		if (drm_dp_128b132b_eq_interlane_align_done(link_status) &&
+		    drm_dp_128b132b_cds_interlane_align_done(link_status) &&
+		    drm_dp_128b132b_lane_symbol_locked(link_status, crtc_state->lane_count)) {
+			drm_dbg_kms(&i915->drm,
+				    "[ENCODER:%d:%s] CDS interlane align done\n",
+				    encoder->base.base.id, encoder->base.name);
+			break;
+		}
+
+		if (drm_dp_128b132b_link_training_failed(link_status)) {
+			intel_dp_dump_link_status(intel_dp, DP_PHY_DPRX, link_status);
+			drm_err(&i915->drm,
+				"[ENCODER:%d:%s] Downstream link training failure\n",
+				encoder->base.base.id, encoder->base.name);
+			return false;
+		}
+
+		if (timeout) {
+			intel_dp_dump_link_status(intel_dp, DP_PHY_DPRX, link_status);
+			drm_err(&i915->drm,
+				"[ENCODER:%d:%s] CDS timeout\n",
+				encoder->base.base.id, encoder->base.name);
+			return false;
+		}
+	}
+
+	/* FIXME: Should DP_TRAINING_PATTERN_DISABLE be written first? */
+	if (intel_dp->set_idle_link_train)
+		intel_dp->set_idle_link_train(intel_dp, crtc_state);
+
+	return true;
+}
+
+/*
+ * 128b/132b link training sequence. (DP 2.0 E11 SCR on link training.)
+ */
+static bool
+intel_dp_128b132b_link_train(struct intel_dp *intel_dp,
+			     const struct intel_crtc_state *crtc_state,
+			     int lttpr_count)
+{
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
+	struct intel_connector *connector = intel_dp->attached_connector;
+	struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base;
+	bool passed = false;
+
+	if (wait_for(intel_dp_128b132b_intra_hop(intel_dp, crtc_state) == 0, 500)) {
+		drm_err(&i915->drm,
+			"[ENCODER:%d:%s] 128b/132b intra-hop not clear\n",
+			encoder->base.base.id, encoder->base.name);
+		return false;
+	}
+
+	if (intel_dp_128b132b_lane_eq(intel_dp, crtc_state) &&
+	    intel_dp_128b132b_lane_cds(intel_dp, crtc_state, lttpr_count))
+		passed = true;
+
+	drm_dbg_kms(&i915->drm,
+		    "[CONNECTOR:%d:%s][ENCODER:%d:%s] 128b/132b Link Training %s at link rate = %d, lane count = %d\n",
+		    connector->base.base.id, connector->base.name,
+		    encoder->base.base.id, encoder->base.name,
+		    passed ? "passed" : "failed",
+		    crtc_state->port_clock, crtc_state->lane_count);
+
+	return passed;
+}
+
 /**
  * intel_dp_start_link_train - start link training
  * @intel_dp: DP struct
@@ -1117,6 +1408,7 @@ intel_dp_link_train_all_phys(struct intel_dp *intel_dp,
 void intel_dp_start_link_train(struct intel_dp *intel_dp,
 			       const struct intel_crtc_state *crtc_state)
 {
+	bool passed;
 	/*
 	 * TODO: Reiniting LTTPRs here won't be needed once proper connector
 	 * HW state readout is added.
@@ -1127,6 +1419,13 @@ void intel_dp_start_link_train(struct intel_dp *intel_dp,
 		/* Still continue with enabling the port and link training. */
 		lttpr_count = 0;
 
-	if (!intel_dp_link_train_all_phys(intel_dp, crtc_state, lttpr_count))
+	intel_dp_prepare_link_train(intel_dp, crtc_state);
+
+	if (intel_dp_is_uhbr(crtc_state))
+		passed = intel_dp_128b132b_link_train(intel_dp, crtc_state, lttpr_count);
+	else
+		passed = intel_dp_link_train_all_phys(intel_dp, crtc_state, lttpr_count);
+
+	if (!passed)
 		intel_dp_schedule_fallback_link_training(intel_dp, crtc_state);
 }
diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.h b/drivers/gpu/drm/i915/display/intel_dp_link_training.h
index dbfb15705aaaac8a6e168b2eab03ac57c2f348c8..dc1556b46b851936be27276f9e13a8854907b258 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_link_training.h
+++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.h
@@ -29,6 +29,10 @@ void intel_dp_start_link_train(struct intel_dp *intel_dp,
 void intel_dp_stop_link_train(struct intel_dp *intel_dp,
 			      const struct intel_crtc_state *crtc_state);
 
+void
+intel_dp_dump_link_status(struct intel_dp *intel_dp, enum drm_dp_phy dp_phy,
+			  const u8 link_status[DP_LINK_STATUS_SIZE]);
+
 /* Get the TPSx symbol type of the value programmed to DP_TRAINING_PATTERN_SET */
 static inline u8 intel_dp_training_pattern_symbol(u8 pattern)
 {
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index 6b6eab507d308f08fdbed1ef05d557d30a6fbdec..e30e698aa684332264f4a3f14533a2858c2f510b 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -99,6 +99,29 @@ static int intel_dp_mst_compute_link_config(struct intel_encoder *encoder,
 	return 0;
 }
 
+static int intel_dp_mst_update_slots(struct intel_encoder *encoder,
+				     struct intel_crtc_state *crtc_state,
+				     struct drm_connector_state *conn_state)
+{
+	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
+	struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder);
+	struct intel_dp *intel_dp = &intel_mst->primary->dp;
+	struct drm_dp_mst_topology_mgr *mgr = &intel_dp->mst_mgr;
+	struct drm_dp_mst_topology_state *topology_state;
+	u8 link_coding_cap = intel_dp_is_uhbr(crtc_state) ?
+		DP_CAP_ANSI_128B132B : DP_CAP_ANSI_8B10B;
+
+	topology_state = drm_atomic_get_mst_topology_state(conn_state->state, mgr);
+	if (IS_ERR(topology_state)) {
+		drm_dbg_kms(&i915->drm, "slot update failed\n");
+		return PTR_ERR(topology_state);
+	}
+
+	drm_dp_mst_update_slots(topology_state, link_coding_cap);
+
+	return 0;
+}
+
 static int intel_dp_mst_compute_config(struct intel_encoder *encoder,
 				       struct intel_crtc_state *pipe_config,
 				       struct drm_connector_state *conn_state)
@@ -155,6 +178,10 @@ static int intel_dp_mst_compute_config(struct intel_encoder *encoder,
 	if (ret)
 		return ret;
 
+	ret = intel_dp_mst_update_slots(encoder, pipe_config, conn_state);
+	if (ret)
+		return ret;
+
 	pipe_config->limited_color_range =
 		intel_dp_limited_color_range(pipe_config, conn_state);
 
@@ -357,6 +384,7 @@ static void intel_mst_disable_dp(struct intel_atomic_state *state,
 	struct intel_connector *connector =
 		to_intel_connector(old_conn_state->connector);
 	struct drm_i915_private *i915 = to_i915(connector->base.dev);
+	int start_slot = intel_dp_is_uhbr(old_crtc_state) ? 0 : 1;
 	int ret;
 
 	drm_dbg_kms(&i915->drm, "active links %d\n",
@@ -366,7 +394,7 @@ static void intel_mst_disable_dp(struct intel_atomic_state *state,
 
 	drm_dp_mst_reset_vcpi_slots(&intel_dp->mst_mgr, connector->port);
 
-	ret = drm_dp_update_payload_part1(&intel_dp->mst_mgr, 1);
+	ret = drm_dp_update_payload_part1(&intel_dp->mst_mgr, start_slot);
 	if (ret) {
 		drm_dbg_kms(&i915->drm, "failed to update payload %d\n", ret);
 	}
@@ -475,6 +503,7 @@ static void intel_mst_pre_enable_dp(struct intel_atomic_state *state,
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_connector *connector =
 		to_intel_connector(conn_state->connector);
+	int start_slot = intel_dp_is_uhbr(pipe_config) ? 0 : 1;
 	int ret;
 	bool first_mst_stream;
 
@@ -509,7 +538,7 @@ static void intel_mst_pre_enable_dp(struct intel_atomic_state *state,
 
 	intel_dp->active_mst_links++;
 
-	ret = drm_dp_update_payload_part1(&intel_dp->mst_mgr, 1);
+	ret = drm_dp_update_payload_part1(&intel_dp->mst_mgr, start_slot);
 
 	/*
 	 * Before Gen 12 this is not done as part of
diff --git a/drivers/gpu/drm/i915/display/intel_dpll.c b/drivers/gpu/drm/i915/display/intel_dpll.c
index 1ce0c171f4fb32f90e907680a85630d2bb57c429..14f5ffe27d05784b39aa599ef3ee23e67bbac60e 100644
--- a/drivers/gpu/drm/i915/display/intel_dpll.c
+++ b/drivers/gpu/drm/i915/display/intel_dpll.c
@@ -16,6 +16,10 @@
 #include "intel_snps_phy.h"
 #include "vlv_sideband.h"
 
+struct intel_dpll_funcs {
+	int (*crtc_compute_clock)(struct intel_crtc_state *crtc_state);
+};
+
 struct intel_limit {
 	struct {
 		int min, max;
@@ -1400,6 +1404,14 @@ static const struct intel_dpll_funcs i8xx_dpll_funcs = {
 	.crtc_compute_clock = i8xx_crtc_compute_clock,
 };
 
+int intel_dpll_crtc_compute_clock(struct intel_crtc_state *crtc_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
+
+	return i915->dpll_funcs->crtc_compute_clock(crtc_state);
+}
+
 void
 intel_dpll_init_clock_hook(struct drm_i915_private *dev_priv)
 {
diff --git a/drivers/gpu/drm/i915/display/intel_dpll.h b/drivers/gpu/drm/i915/display/intel_dpll.h
index 1af0ac43cca45e31c9d6adb351e3a99c3db0bb72..69b06a9e473e2fea42e145e810ef80495c38edb7 100644
--- a/drivers/gpu/drm/i915/display/intel_dpll.h
+++ b/drivers/gpu/drm/i915/display/intel_dpll.h
@@ -15,6 +15,7 @@ struct intel_crtc_state;
 enum pipe;
 
 void intel_dpll_init_clock_hook(struct drm_i915_private *dev_priv);
+int intel_dpll_crtc_compute_clock(struct intel_crtc_state *crtc_state);
 int vlv_calc_dpll_params(int refclk, struct dpll *clock);
 int pnv_calc_dpll_params(int refclk, struct dpll *clock);
 int i9xx_calc_dpll_params(int refclk, struct dpll *clock);
diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
index 6723c3de5a80ce0bf4e7d8352cec48714635ef19..569903d47aea57076cea3c620128352e873f7b60 100644
--- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
+++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
@@ -2748,6 +2748,9 @@ static void icl_calc_dpll_state(struct drm_i915_private *i915,
 		pll_state->cfgcr1 |= TGL_DPLL_CFGCR1_CFSELOVRD_NORMAL_XTAL;
 	else
 		pll_state->cfgcr1 |= DPLL_CFGCR1_CENTRAL_FREQ_8400;
+
+	if (i915->vbt.override_afc_startup)
+		pll_state->div0 = TGL_DPLL0_DIV0_AFC_STARTUP(i915->vbt.override_afc_startup_val);
 }
 
 static bool icl_mg_pll_find_divisors(int clock_khz, bool is_dp, bool use_ssc,
@@ -2949,6 +2952,11 @@ static bool icl_calc_mg_pll_state(struct intel_crtc_state *crtc_state,
 					 DKL_PLL_DIV0_PROP_COEFF(prop_coeff) |
 					 DKL_PLL_DIV0_FBPREDIV(m1div) |
 					 DKL_PLL_DIV0_FBDIV_INT(m2div_int);
+		if (dev_priv->vbt.override_afc_startup) {
+			u8 val = dev_priv->vbt.override_afc_startup_val;
+
+			pll_state->mg_pll_div0 |= DKL_PLL_DIV0_AFC_STARTUP(val);
+		}
 
 		pll_state->mg_pll_div1 = DKL_PLL_DIV1_IREF_TRIM(iref_trim) |
 					 DKL_PLL_DIV1_TDC_TARGET_CNT(tdc_targetcnt);
@@ -3448,10 +3456,10 @@ static bool dkl_pll_get_hw_state(struct drm_i915_private *dev_priv,
 		MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO_MASK;
 
 	hw_state->mg_pll_div0 = intel_de_read(dev_priv, DKL_PLL_DIV0(tc_port));
-	hw_state->mg_pll_div0 &= (DKL_PLL_DIV0_INTEG_COEFF_MASK |
-				  DKL_PLL_DIV0_PROP_COEFF_MASK |
-				  DKL_PLL_DIV0_FBPREDIV_MASK |
-				  DKL_PLL_DIV0_FBDIV_INT_MASK);
+	val = DKL_PLL_DIV0_MASK;
+	if (dev_priv->vbt.override_afc_startup)
+		val |= DKL_PLL_DIV0_AFC_STARTUP_MASK;
+	hw_state->mg_pll_div0 &= val;
 
 	hw_state->mg_pll_div1 = intel_de_read(dev_priv, DKL_PLL_DIV1(tc_port));
 	hw_state->mg_pll_div1 &= (DKL_PLL_DIV1_IREF_TRIM_MASK |
@@ -3513,6 +3521,10 @@ static bool icl_pll_get_hw_state(struct drm_i915_private *dev_priv,
 						 TGL_DPLL_CFGCR0(id));
 		hw_state->cfgcr1 = intel_de_read(dev_priv,
 						 TGL_DPLL_CFGCR1(id));
+		if (dev_priv->vbt.override_afc_startup) {
+			hw_state->div0 = intel_de_read(dev_priv, TGL_DPLL0_DIV0(id));
+			hw_state->div0 &= TGL_DPLL0_DIV0_AFC_STARTUP_MASK;
+		}
 	} else {
 		if (IS_JSL_EHL(dev_priv) && id == DPLL_ID_EHL_DPLL4) {
 			hw_state->cfgcr0 = intel_de_read(dev_priv,
@@ -3554,7 +3566,7 @@ static void icl_dpll_write(struct drm_i915_private *dev_priv,
 {
 	struct intel_dpll_hw_state *hw_state = &pll->state.hw_state;
 	const enum intel_dpll_id id = pll->info->id;
-	i915_reg_t cfgcr0_reg, cfgcr1_reg;
+	i915_reg_t cfgcr0_reg, cfgcr1_reg, div0_reg = INVALID_MMIO_REG;
 
 	if (IS_ALDERLAKE_S(dev_priv)) {
 		cfgcr0_reg = ADLS_DPLL_CFGCR0(id);
@@ -3568,6 +3580,7 @@ static void icl_dpll_write(struct drm_i915_private *dev_priv,
 	} else if (DISPLAY_VER(dev_priv) >= 12) {
 		cfgcr0_reg = TGL_DPLL_CFGCR0(id);
 		cfgcr1_reg = TGL_DPLL_CFGCR1(id);
+		div0_reg = TGL_DPLL0_DIV0(id);
 	} else {
 		if (IS_JSL_EHL(dev_priv) && id == DPLL_ID_EHL_DPLL4) {
 			cfgcr0_reg = ICL_DPLL_CFGCR0(4);
@@ -3580,6 +3593,12 @@ static void icl_dpll_write(struct drm_i915_private *dev_priv,
 
 	intel_de_write(dev_priv, cfgcr0_reg, hw_state->cfgcr0);
 	intel_de_write(dev_priv, cfgcr1_reg, hw_state->cfgcr1);
+	drm_WARN_ON_ONCE(&dev_priv->drm, dev_priv->vbt.override_afc_startup &&
+			 !i915_mmio_reg_valid(div0_reg));
+	if (dev_priv->vbt.override_afc_startup &&
+	    i915_mmio_reg_valid(div0_reg))
+		intel_de_rmw(dev_priv, div0_reg, TGL_DPLL0_DIV0_AFC_STARTUP_MASK,
+			     hw_state->div0);
 	intel_de_posting_read(dev_priv, cfgcr1_reg);
 }
 
@@ -3667,13 +3686,11 @@ static void dkl_pll_write(struct drm_i915_private *dev_priv,
 	val |= hw_state->mg_clktop2_hsclkctl;
 	intel_de_write(dev_priv, DKL_CLKTOP2_HSCLKCTL(tc_port), val);
 
-	val = intel_de_read(dev_priv, DKL_PLL_DIV0(tc_port));
-	val &= ~(DKL_PLL_DIV0_INTEG_COEFF_MASK |
-		 DKL_PLL_DIV0_PROP_COEFF_MASK |
-		 DKL_PLL_DIV0_FBPREDIV_MASK |
-		 DKL_PLL_DIV0_FBDIV_INT_MASK);
-	val |= hw_state->mg_pll_div0;
-	intel_de_write(dev_priv, DKL_PLL_DIV0(tc_port), val);
+	val = DKL_PLL_DIV0_MASK;
+	if (dev_priv->vbt.override_afc_startup)
+		val |= DKL_PLL_DIV0_AFC_STARTUP_MASK;
+	intel_de_rmw(dev_priv, DKL_PLL_DIV0(tc_port), val,
+		     hw_state->mg_pll_div0);
 
 	val = intel_de_read(dev_priv, DKL_PLL_DIV1(tc_port));
 	val &= ~(DKL_PLL_DIV1_IREF_TRIM_MASK |
@@ -3912,13 +3929,14 @@ static void icl_dump_hw_state(struct drm_i915_private *dev_priv,
 			      const struct intel_dpll_hw_state *hw_state)
 {
 	drm_dbg_kms(&dev_priv->drm,
-		    "dpll_hw_state: cfgcr0: 0x%x, cfgcr1: 0x%x, "
+		    "dpll_hw_state: cfgcr0: 0x%x, cfgcr1: 0x%x, div0: 0x%x, "
 		    "mg_refclkin_ctl: 0x%x, hg_clktop2_coreclkctl1: 0x%x, "
 		    "mg_clktop2_hsclkctl: 0x%x, mg_pll_div0: 0x%x, "
 		    "mg_pll_div2: 0x%x, mg_pll_lf: 0x%x, "
 		    "mg_pll_frac_lock: 0x%x, mg_pll_ssc: 0x%x, "
 		    "mg_pll_bias: 0x%x, mg_pll_tdc_coldst_bias: 0x%x\n",
 		    hw_state->cfgcr0, hw_state->cfgcr1,
+		    hw_state->div0,
 		    hw_state->mg_refclkin_ctl,
 		    hw_state->mg_clktop2_coreclkctl1,
 		    hw_state->mg_clktop2_hsclkctl,
diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.h b/drivers/gpu/drm/i915/display/intel_dpll_mgr.h
index 91fe181462b2e10bd2260b24cef8394025ad65b6..ba2fdfce157920b0f1ac9d4014a3351312618d1d 100644
--- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.h
+++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.h
@@ -208,6 +208,9 @@ struct intel_dpll_hw_state {
 	/* icl */
 	u32 cfgcr0;
 
+	/* tgl */
+	u32 div0;
+
 	/* bxt */
 	u32 ebb0, ebb4, pll0, pll1, pll2, pll3, pll6, pll8, pll9, pll10, pcsdw12;
 
diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c
index 8f674745e7e0a0f8a7329963ccc0f89e25bf3a0e..05dd7dba3a5ca376c0506199184319b1ada29b31 100644
--- a/drivers/gpu/drm/i915/display/intel_dpt.c
+++ b/drivers/gpu/drm/i915/display/intel_dpt.c
@@ -3,11 +3,13 @@
  * Copyright © 2021 Intel Corporation
  */
 
+#include "gem/i915_gem_domain.h"
+#include "gt/gen8_ppgtt.h"
+
 #include "i915_drv.h"
 #include "intel_display_types.h"
 #include "intel_dpt.h"
 #include "intel_fb.h"
-#include "gt/gen8_ppgtt.h"
 
 struct i915_dpt {
 	struct i915_address_space vm;
@@ -48,7 +50,7 @@ static void dpt_insert_page(struct i915_address_space *vm,
 }
 
 static void dpt_insert_entries(struct i915_address_space *vm,
-			       struct i915_vma *vma,
+			       struct i915_vma_resource *vma_res,
 			       enum i915_cache_level level,
 			       u32 flags)
 {
@@ -64,8 +66,8 @@ static void dpt_insert_entries(struct i915_address_space *vm,
 	 * not to allow the user to override access to a read only page.
 	 */
 
-	i = vma->node.start / I915_GTT_PAGE_SIZE;
-	for_each_sgt_daddr(addr, sgt_iter, vma->pages)
+	i = vma_res->start / I915_GTT_PAGE_SIZE;
+	for_each_sgt_daddr(addr, sgt_iter, vma_res->bi.pages)
 		gen8_set_pte(&base[i++], pte_encode | addr);
 }
 
@@ -76,35 +78,38 @@ static void dpt_clear_range(struct i915_address_space *vm,
 
 static void dpt_bind_vma(struct i915_address_space *vm,
 			 struct i915_vm_pt_stash *stash,
-			 struct i915_vma *vma,
+			 struct i915_vma_resource *vma_res,
 			 enum i915_cache_level cache_level,
 			 u32 flags)
 {
-	struct drm_i915_gem_object *obj = vma->obj;
 	u32 pte_flags;
 
+	if (vma_res->bound_flags)
+		return;
+
 	/* Applicable to VLV (gen8+ do not support RO in the GGTT) */
 	pte_flags = 0;
-	if (vma->vm->has_read_only && i915_gem_object_is_readonly(obj))
+	if (vm->has_read_only && vma_res->bi.readonly)
 		pte_flags |= PTE_READ_ONLY;
-	if (i915_gem_object_is_lmem(obj))
+	if (vma_res->bi.lmem)
 		pte_flags |= PTE_LM;
 
-	vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
+	vm->insert_entries(vm, vma_res, cache_level, pte_flags);
 
-	vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
+	vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE;
 
 	/*
 	 * Without aliasing PPGTT there's no difference between
 	 * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally
 	 * upgrade to both bound if we bind either to avoid double-binding.
 	 */
-	atomic_or(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND, &vma->flags);
+	vma_res->bound_flags = I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
 }
 
-static void dpt_unbind_vma(struct i915_address_space *vm, struct i915_vma *vma)
+static void dpt_unbind_vma(struct i915_address_space *vm,
+			   struct i915_vma_resource *vma_res)
 {
-	vm->clear_range(vm, vma->node.start, vma->size);
+	vm->clear_range(vm, vma_res->start, vma_res->vma_size);
 }
 
 static void dpt_cleanup(struct i915_address_space *vm)
@@ -250,7 +255,11 @@ intel_dpt_create(struct intel_framebuffer *fb)
 	if (IS_ERR(dpt_obj))
 		return ERR_CAST(dpt_obj);
 
-	ret = i915_gem_object_set_cache_level(dpt_obj, I915_CACHE_NONE);
+	ret = i915_gem_object_lock_interruptible(dpt_obj, NULL);
+	if (!ret) {
+		ret = i915_gem_object_set_cache_level(dpt_obj, I915_CACHE_NONE);
+		i915_gem_object_unlock(dpt_obj);
+	}
 	if (ret) {
 		i915_gem_object_put(dpt_obj);
 		return ERR_PTR(ret);
diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c
index 83a69a4a4fea84c6425a64e52fd915e921acac36..b34a67309976a8ba0ba9647a6351d15ade45186e 100644
--- a/drivers/gpu/drm/i915/display/intel_dsb.c
+++ b/drivers/gpu/drm/i915/display/intel_dsb.c
@@ -4,6 +4,8 @@
  *
  */
 
+#include "gem/i915_gem_internal.h"
+
 #include "i915_drv.h"
 #include "intel_de.h"
 #include "intel_display_types.h"
diff --git a/drivers/gpu/drm/i915/display/intel_dsi.h b/drivers/gpu/drm/i915/display/intel_dsi.h
index a3a906cb097eb7faa6d62017a3dd994d0b295ddd..eafef0a87fea79385d5745a8c71f3db7ebbf7746 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi.h
+++ b/drivers/gpu/drm/i915/display/intel_dsi.h
@@ -79,8 +79,8 @@ struct intel_dsi {
 	 */
 	enum mipi_dsi_pixel_format pixel_format;
 
-	/* video mode format for MIPI_VIDEO_MODE_FORMAT register */
-	u32 video_mode_format;
+	/* NON_BURST_SYNC_PULSE, NON_BURST_SYNC_EVENTS, or BURST_MODE */
+	int video_mode;
 
 	/* eot for MIPI_EOT_DISABLE register */
 	u8 eotp_pkt;
diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
index a85574c413e831f4824e3f5e46674edd80b9e517..6b4a27372c825337ed69a26b359bb053d3bf83de 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
+++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
@@ -44,6 +44,7 @@
 #include "intel_dsi.h"
 #include "intel_dsi_vbt.h"
 #include "vlv_dsi.h"
+#include "vlv_dsi_regs.h"
 #include "vlv_sideband.h"
 
 #define MIPI_TRANSFER_MODE_SHIFT	0
@@ -675,11 +676,11 @@ void intel_dsi_log_params(struct intel_dsi *intel_dsi)
 	drm_dbg_kms(&i915->drm, "Lane count %d\n", intel_dsi->lane_count);
 	drm_dbg_kms(&i915->drm, "DPHY param reg 0x%x\n", intel_dsi->dphy_reg);
 	drm_dbg_kms(&i915->drm, "Video mode format %s\n",
-		    intel_dsi->video_mode_format == VIDEO_MODE_NON_BURST_WITH_SYNC_PULSE ?
+		    intel_dsi->video_mode == NON_BURST_SYNC_PULSE ?
 		    "non-burst with sync pulse" :
-		    intel_dsi->video_mode_format == VIDEO_MODE_NON_BURST_WITH_SYNC_EVENTS ?
+		    intel_dsi->video_mode == NON_BURST_SYNC_EVENTS ?
 		    "non-burst with sync events" :
-		    intel_dsi->video_mode_format == VIDEO_MODE_BURST ?
+		    intel_dsi->video_mode == BURST_MODE ?
 		    "burst" : "<unknown>");
 	drm_dbg_kms(&i915->drm, "Burst mode ratio %d\n",
 		    intel_dsi->burst_mode_ratio);
@@ -739,7 +740,7 @@ bool intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id)
 	intel_dsi->dual_link = mipi_config->dual_link;
 	intel_dsi->pixel_overlap = mipi_config->pixel_overlap;
 	intel_dsi->operation_mode = mipi_config->is_cmd_mode;
-	intel_dsi->video_mode_format = mipi_config->video_transfer_mode;
+	intel_dsi->video_mode = mipi_config->video_transfer_mode;
 	intel_dsi->escape_clk_div = mipi_config->byte_clk_sel;
 	intel_dsi->lp_rx_timeout = mipi_config->lp_rx_timeout;
 	intel_dsi->hs_tx_timeout = mipi_config->hs_tx_timeout;
@@ -770,7 +771,7 @@ bool intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id)
 	 * Target ddr frequency from VBT / non burst ddr freq
 	 * multiply by 100 to preserve remainder
 	 */
-	if (intel_dsi->video_mode_format == VIDEO_MODE_BURST) {
+	if (intel_dsi->video_mode == BURST_MODE) {
 		if (mipi_config->target_burst_mode_freq) {
 			u32 bitrate = intel_dsi_bitrate(intel_dsi);
 
diff --git a/drivers/gpu/drm/i915/display/intel_fb_pin.c b/drivers/gpu/drm/i915/display/intel_fb_pin.c
index 31c15e5fca956339933928ce8223640e538c63e0..a307b4993bcf3ac276e5a81aecac9e9a4d8a1120 100644
--- a/drivers/gpu/drm/i915/display/intel_fb_pin.c
+++ b/drivers/gpu/drm/i915/display/intel_fb_pin.c
@@ -7,6 +7,7 @@
  * DOC: display pinning helpers
  */
 
+#include "gem/i915_gem_domain.h"
 #include "gem/i915_gem_object.h"
 
 #include "i915_drv.h"
@@ -36,7 +37,11 @@ intel_pin_fb_obj_dpt(struct drm_framebuffer *fb,
 
 	atomic_inc(&dev_priv->gpu_error.pending_fb_pin);
 
-	ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE);
+	ret = i915_gem_object_lock_interruptible(obj, NULL);
+	if (!ret) {
+		ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE);
+		i915_gem_object_unlock(obj);
+	}
 	if (ret) {
 		vma = ERR_PTR(ret);
 		goto err;
@@ -47,7 +52,7 @@ intel_pin_fb_obj_dpt(struct drm_framebuffer *fb,
 		goto err;
 
 	if (i915_vma_misplaced(vma, 0, alignment, 0)) {
-		ret = i915_vma_unbind(vma);
+		ret = i915_vma_unbind_unlocked(vma);
 		if (ret) {
 			vma = ERR_PTR(ret);
 			goto err;
diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c
index 465dc4e97ea8c8e0f69d6321b56c3c000d785c16..87f4af3fd523ec0d1a9a2191aeec6bbd422c43db 100644
--- a/drivers/gpu/drm/i915/display/intel_fbc.c
+++ b/drivers/gpu/drm/i915/display/intel_fbc.c
@@ -605,7 +605,7 @@ static void ivb_fbc_activate(struct intel_fbc *fbc)
 	else if (DISPLAY_VER(i915) == 9)
 		skl_fbc_program_cfb_stride(fbc);
 
-	if (i915->ggtt.num_fences)
+	if (to_gt(i915)->ggtt->num_fences)
 		snb_fbc_program_fence(fbc);
 
 	intel_de_write(i915, ILK_DPFC_CONTROL(fbc->id),
@@ -1125,7 +1125,8 @@ static int intel_fbc_check_plane(struct intel_atomic_state *state,
 
 	/* Wa_22010751166: icl, ehl, tgl, dg1, rkl */
 	if (DISPLAY_VER(i915) >= 11 &&
-	    (plane_state->view.color_plane[0].y + drm_rect_height(&plane_state->uapi.src)) & 3) {
+	    (plane_state->view.color_plane[0].y +
+	     (drm_rect_height(&plane_state->uapi.src) >> 16)) & 3) {
 		plane_state->no_fbc_reason = "plane end Y offset misaligned";
 		return false;
 	}
diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c
index adc3a81be9f72b4e0e50760abfe7d9a41a9ba933..fd5bc7acf08d4844134003251073fb4d83956352 100644
--- a/drivers/gpu/drm/i915/display/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/display/intel_fbdev.c
@@ -50,6 +50,23 @@
 #include "intel_fbdev.h"
 #include "intel_frontbuffer.h"
 
+struct intel_fbdev {
+	struct drm_fb_helper helper;
+	struct intel_framebuffer *fb;
+	struct i915_vma *vma;
+	unsigned long vma_flags;
+	async_cookie_t cookie;
+	int preferred_bpp;
+
+	/* Whether or not fbdev hpd processing is temporarily suspended */
+	bool hpd_suspended: 1;
+	/* Set when a hotplug was received while HPD processing was suspended */
+	bool hpd_waiting: 1;
+
+	/* Protects hpd_suspended */
+	struct mutex hpd_lock;
+};
+
 static struct intel_frontbuffer *to_frontbuffer(struct intel_fbdev *ifbdev)
 {
 	return ifbdev->fb->frontbuffer;
@@ -180,7 +197,7 @@ static int intelfb_create(struct drm_fb_helper *helper,
 	struct drm_device *dev = helper->dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev);
-	struct i915_ggtt *ggtt = &dev_priv->ggtt;
+	struct i915_ggtt *ggtt = to_gt(dev_priv)->ggtt;
 	const struct i915_ggtt_view view = {
 		.type = I915_GGTT_VIEW_NORMAL,
 	};
@@ -680,3 +697,11 @@ void intel_fbdev_restore_mode(struct drm_device *dev)
 	if (drm_fb_helper_restore_fbdev_mode_unlocked(&ifbdev->helper) == 0)
 		intel_fbdev_invalidate(ifbdev);
 }
+
+struct intel_framebuffer *intel_fbdev_framebuffer(struct intel_fbdev *fbdev)
+{
+	if (!fbdev || !fbdev->helper.fb)
+		return NULL;
+
+	return to_intel_framebuffer(fbdev->helper.fb);
+}
diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.h b/drivers/gpu/drm/i915/display/intel_fbdev.h
index de7c84250eb51259a94a3dc1735bf3bdcbdda092..0e95e9472fa3ba8fad6845160d9b0623b1327be0 100644
--- a/drivers/gpu/drm/i915/display/intel_fbdev.h
+++ b/drivers/gpu/drm/i915/display/intel_fbdev.h
@@ -10,6 +10,8 @@
 
 struct drm_device;
 struct drm_i915_private;
+struct intel_fbdev;
+struct intel_framebuffer;
 
 #ifdef CONFIG_DRM_FBDEV_EMULATION
 int intel_fbdev_init(struct drm_device *dev);
@@ -19,6 +21,7 @@ void intel_fbdev_fini(struct drm_i915_private *dev_priv);
 void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous);
 void intel_fbdev_output_poll_changed(struct drm_device *dev);
 void intel_fbdev_restore_mode(struct drm_device *dev);
+struct intel_framebuffer *intel_fbdev_framebuffer(struct intel_fbdev *fbdev);
 #else
 static inline int intel_fbdev_init(struct drm_device *dev)
 {
@@ -48,6 +51,10 @@ static inline void intel_fbdev_output_poll_changed(struct drm_device *dev)
 static inline void intel_fbdev_restore_mode(struct drm_device *dev)
 {
 }
+static inline struct intel_framebuffer *intel_fbdev_framebuffer(struct intel_fbdev *fbdev)
+{
+	return NULL;
+}
 #endif
 
 #endif /* __INTEL_FBDEV_H__ */
diff --git a/drivers/gpu/drm/i915/display/intel_fdi.c b/drivers/gpu/drm/i915/display/intel_fdi.c
index 3d6e229236010ea925a50c8ef64c55cfb0696762..4e4b43669b14d505c084881114c20bff2ec75893 100644
--- a/drivers/gpu/drm/i915/display/intel_fdi.c
+++ b/drivers/gpu/drm/i915/display/intel_fdi.c
@@ -10,6 +10,11 @@
 #include "intel_display_types.h"
 #include "intel_fdi.h"
 
+struct intel_fdi_funcs {
+	void (*fdi_link_train)(struct intel_crtc *crtc,
+			       const struct intel_crtc_state *crtc_state);
+};
+
 static void assert_fdi_tx(struct drm_i915_private *dev_priv,
 			  enum pipe pipe, bool state)
 {
diff --git a/drivers/gpu/drm/i915/display/intel_gmbus.c b/drivers/gpu/drm/i915/display/intel_gmbus.c
index 6ce8c10fe975cfcb6f9a75a2d55a7194da33f637..2fad03250661a9350e5111b050daefe1489b6b37 100644
--- a/drivers/gpu/drm/i915/display/intel_gmbus.c
+++ b/drivers/gpu/drm/i915/display/intel_gmbus.c
@@ -98,11 +98,21 @@ static const struct gmbus_pin gmbus_pins_dg1[] = {
 	[GMBUS_PIN_4_CNP] = { "dpd", GPIOE },
 };
 
+static const struct gmbus_pin gmbus_pins_dg2[] = {
+	[GMBUS_PIN_1_BXT] = { "dpa", GPIOB },
+	[GMBUS_PIN_2_BXT] = { "dpb", GPIOC },
+	[GMBUS_PIN_3_BXT] = { "dpc", GPIOD },
+	[GMBUS_PIN_4_CNP] = { "dpd", GPIOE },
+	[GMBUS_PIN_9_TC1_ICP] = { "tc1", GPIOJ },
+};
+
 /* pin is expected to be valid */
 static const struct gmbus_pin *get_gmbus_pin(struct drm_i915_private *dev_priv,
 					     unsigned int pin)
 {
-	if (INTEL_PCH_TYPE(dev_priv) >= PCH_DG1)
+	if (INTEL_PCH_TYPE(dev_priv) >= PCH_DG2)
+		return &gmbus_pins_dg2[pin];
+	else if (INTEL_PCH_TYPE(dev_priv) >= PCH_DG1)
 		return &gmbus_pins_dg1[pin];
 	else if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP)
 		return &gmbus_pins_icp[pin];
@@ -123,7 +133,9 @@ bool intel_gmbus_is_valid_pin(struct drm_i915_private *dev_priv,
 {
 	unsigned int size;
 
-	if (INTEL_PCH_TYPE(dev_priv) >= PCH_DG1)
+	if (INTEL_PCH_TYPE(dev_priv) >= PCH_DG2)
+		size = ARRAY_SIZE(gmbus_pins_dg2);
+	else if (INTEL_PCH_TYPE(dev_priv) >= PCH_DG1)
 		size = ARRAY_SIZE(gmbus_pins_dg1);
 	else if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP)
 		size = ARRAY_SIZE(gmbus_pins_icp);
diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c
index 6c72f85872407928dbf5c5df5bed32903b81870d..1aa5bdc7b0dcf0881cb847f23ffec0eede709073 100644
--- a/drivers/gpu/drm/i915/display/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/display/intel_hdmi.c
@@ -1869,7 +1869,7 @@ hdmi_port_clock_valid(struct intel_hdmi *hdmi,
 	return MODE_OK;
 }
 
-static int intel_hdmi_tmds_clock(int clock, int bpc, bool ycbcr420_output)
+int intel_hdmi_tmds_clock(int clock, int bpc, bool ycbcr420_output)
 {
 	/* YCBCR420 TMDS rate requirement is half the pixel clock */
 	if (ycbcr420_output)
@@ -1935,25 +1935,30 @@ intel_hdmi_mode_clock_valid(struct drm_connector *connector, int clock,
 {
 	struct drm_i915_private *i915 = to_i915(connector->dev);
 	struct intel_hdmi *hdmi = intel_attached_hdmi(to_intel_connector(connector));
-	enum drm_mode_status status;
+	enum drm_mode_status status = MODE_OK;
+	int bpc;
+
+	/*
+	 * Try all color depths since valid port clock range
+	 * can have holes. Any mode that can be used with at
+	 * least one color depth is accepted.
+	 */
+	for (bpc = 12; bpc >= 8; bpc -= 2) {
+		int tmds_clock = intel_hdmi_tmds_clock(clock, bpc, ycbcr420_output);
+
+		if (!intel_hdmi_source_bpc_possible(i915, bpc))
+			continue;
+
+		if (!intel_hdmi_sink_bpc_possible(connector, bpc, has_hdmi_sink, ycbcr420_output))
+			continue;
+
+		status = hdmi_port_clock_valid(hdmi, tmds_clock, true, has_hdmi_sink);
+		if (status == MODE_OK)
+			return MODE_OK;
+	}
 
-	/* check if we can do 8bpc */
-	status = hdmi_port_clock_valid(hdmi, intel_hdmi_tmds_clock(clock, 8, ycbcr420_output),
-				       true, has_hdmi_sink);
-
-	/* if we can't do 8bpc we may still be able to do 12bpc */
-	if (status != MODE_OK &&
-	    intel_hdmi_source_bpc_possible(i915, 12) &&
-	    intel_hdmi_sink_bpc_possible(connector, 12, has_hdmi_sink, ycbcr420_output))
-		status = hdmi_port_clock_valid(hdmi, intel_hdmi_tmds_clock(clock, 12, ycbcr420_output),
-					       true, has_hdmi_sink);
-
-	/* if we can't do 8,12bpc we may still be able to do 10bpc */
-	if (status != MODE_OK &&
-	    intel_hdmi_source_bpc_possible(i915, 10) &&
-	    intel_hdmi_sink_bpc_possible(connector, 10, has_hdmi_sink, ycbcr420_output))
-		status = hdmi_port_clock_valid(hdmi, intel_hdmi_tmds_clock(clock, 10, ycbcr420_output),
-					       true, has_hdmi_sink);
+	/* can never happen */
+	drm_WARN_ON(&i915->drm, status == MODE_OK);
 
 	return status;
 }
diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.h b/drivers/gpu/drm/i915/display/intel_hdmi.h
index b0804b862a898cfb320c9a429b898d50954dc4aa..93f65a917c368c4e80931c310bebd68c4e85cb81 100644
--- a/drivers/gpu/drm/i915/display/intel_hdmi.h
+++ b/drivers/gpu/drm/i915/display/intel_hdmi.h
@@ -46,6 +46,7 @@ bool intel_hdmi_limited_color_range(const struct intel_crtc_state *crtc_state,
 				    const struct drm_connector_state *conn_state);
 bool intel_hdmi_bpc_possible(const struct intel_crtc_state *crtc_state,
 			     int bpc, bool has_hdmi_sink, bool ycbcr420_output);
+int intel_hdmi_tmds_clock(int clock, int bpc, bool ycbcr420_output);
 int intel_hdmi_dsc_get_bpp(int src_fractional_bpp, int slice_width,
 			   int num_slices, int output_format, bool hdmi_all_bpp,
 			   int hdmi_max_chunk_bytes);
diff --git a/drivers/gpu/drm/i915/display/intel_hotplug.c b/drivers/gpu/drm/i915/display/intel_hotplug.c
index 912b7003dcfa1538f8f4211dd656961d9608d155..8204126d17f9cefc52332378de59ff380589aecb 100644
--- a/drivers/gpu/drm/i915/display/intel_hotplug.c
+++ b/drivers/gpu/drm/i915/display/intel_hotplug.c
@@ -24,6 +24,7 @@
 #include <linux/kernel.h>
 
 #include "i915_drv.h"
+#include "i915_irq.h"
 #include "intel_display_types.h"
 #include "intel_hotplug.h"
 
@@ -213,12 +214,6 @@ intel_hpd_irq_storm_switch_to_polling(struct drm_i915_private *dev_priv)
 	}
 }
 
-static void intel_hpd_irq_setup(struct drm_i915_private *i915)
-{
-	if (i915->display_irqs_enabled && i915->hotplug_funcs)
-		i915->hotplug_funcs->hpd_irq_setup(i915);
-}
-
 static void intel_hpd_irq_storm_reenable_work(struct work_struct *work)
 {
 	struct drm_i915_private *dev_priv =
diff --git a/drivers/gpu/drm/i915/display/intel_opregion.c b/drivers/gpu/drm/i915/display/intel_opregion.c
index af9d30f56cc1a5e8905f175f1837f69b67ef138e..f31e8c3f8ce0e6ac2a6213bd4539d02aa50f284a 100644
--- a/drivers/gpu/drm/i915/display/intel_opregion.c
+++ b/drivers/gpu/drm/i915/display/intel_opregion.c
@@ -47,10 +47,11 @@
 #define OPREGION_ASLE_EXT_OFFSET	0x1C00
 
 #define OPREGION_SIGNATURE "IntelGraphicsMem"
-#define MBOX_ACPI      (1<<0)
-#define MBOX_SWSCI     (1<<1)
-#define MBOX_ASLE      (1<<2)
-#define MBOX_ASLE_EXT  (1<<4)
+#define MBOX_ACPI		BIT(0)	/* Mailbox #1 */
+#define MBOX_SWSCI		BIT(1)	/* Mailbox #2 (obsolete from v2.x) */
+#define MBOX_ASLE		BIT(2)	/* Mailbox #3 */
+#define MBOX_ASLE_EXT		BIT(4)	/* Mailbox #5 */
+#define MBOX_BACKLIGHT		BIT(5)	/* Mailbox #2 (valid from v3.x) */
 
 struct opregion_header {
 	u8 signature[16];
@@ -245,14 +246,10 @@ struct opregion_asle_ext {
 
 #define MAX_DSLP	1500
 
-static int swsci(struct drm_i915_private *dev_priv,
-		 u32 function, u32 parm, u32 *parm_out)
+static int check_swsci_function(struct drm_i915_private *i915, u32 function)
 {
-	struct opregion_swsci *swsci = dev_priv->opregion.swsci;
-	struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev);
-	u32 main_function, sub_function, scic;
-	u16 swsci_val;
-	u32 dslp;
+	struct opregion_swsci *swsci = i915->opregion.swsci;
+	u32 main_function, sub_function;
 
 	if (!swsci)
 		return -ENODEV;
@@ -264,15 +261,31 @@ static int swsci(struct drm_i915_private *dev_priv,
 
 	/* Check if we can call the function. See swsci_setup for details. */
 	if (main_function == SWSCI_SBCB) {
-		if ((dev_priv->opregion.swsci_sbcb_sub_functions &
+		if ((i915->opregion.swsci_sbcb_sub_functions &
 		     (1 << sub_function)) == 0)
 			return -EINVAL;
 	} else if (main_function == SWSCI_GBDA) {
-		if ((dev_priv->opregion.swsci_gbda_sub_functions &
+		if ((i915->opregion.swsci_gbda_sub_functions &
 		     (1 << sub_function)) == 0)
 			return -EINVAL;
 	}
 
+	return 0;
+}
+
+static int swsci(struct drm_i915_private *dev_priv,
+		 u32 function, u32 parm, u32 *parm_out)
+{
+	struct opregion_swsci *swsci = dev_priv->opregion.swsci;
+	struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev);
+	u32 scic, dslp;
+	u16 swsci_val;
+	int ret;
+
+	ret = check_swsci_function(dev_priv, function);
+	if (ret)
+		return ret;
+
 	/* Driver sleep timeout in ms. */
 	dslp = swsci->dslp;
 	if (!dslp) {
@@ -346,11 +359,17 @@ int intel_opregion_notify_encoder(struct intel_encoder *intel_encoder,
 	u32 parm = 0;
 	u32 type = 0;
 	u32 port;
+	int ret;
 
 	/* don't care about old stuff for now */
 	if (!HAS_DDI(dev_priv))
 		return 0;
 
+	/* Avoid port out of bounds checks if SWSCI isn't there. */
+	ret = check_swsci_function(dev_priv, SWSCI_SBCB_DISPLAY_POWER_STATE);
+	if (ret)
+		return ret;
+
 	if (intel_encoder->type == INTEL_OUTPUT_DSI)
 		port = 0;
 	else
@@ -363,6 +382,21 @@ int intel_opregion_notify_encoder(struct intel_encoder *intel_encoder,
 		port++;
 	}
 
+	/*
+	 * The port numbering and mapping here is bizarre. The now-obsolete
+	 * swsci spec supports ports numbered [0..4]. Port E is handled as a
+	 * special case, but port F and beyond are not. The functionality is
+	 * supposed to be obsolete for new platforms. Just bail out if the port
+	 * number is out of bounds after mapping.
+	 */
+	if (port > 4) {
+		drm_dbg_kms(&dev_priv->drm,
+			    "[ENCODER:%d:%s] port %c (index %u) out of bounds for display power state notification\n",
+			    intel_encoder->base.base.id, intel_encoder->base.name,
+			    port_name(intel_encoder->port), port);
+		return -EINVAL;
+	}
+
 	if (!enable)
 		parm |= 4 << 8;
 
@@ -899,9 +933,17 @@ int intel_opregion_setup(struct drm_i915_private *dev_priv)
 	}
 
 	if (mboxes & MBOX_SWSCI) {
-		drm_dbg(&dev_priv->drm, "SWSCI supported\n");
-		opregion->swsci = base + OPREGION_SWSCI_OFFSET;
-		swsci_setup(dev_priv);
+		u8 major = opregion->header->over.major;
+
+		if (major >= 3) {
+			drm_err(&dev_priv->drm, "SWSCI Mailbox #2 present for opregion v3.x, ignoring\n");
+		} else {
+			if (major >= 2)
+				drm_dbg(&dev_priv->drm, "SWSCI Mailbox #2 present for opregion v2.x\n");
+			drm_dbg(&dev_priv->drm, "SWSCI supported\n");
+			opregion->swsci = base + OPREGION_SWSCI_OFFSET;
+			swsci_setup(dev_priv);
+		}
 	}
 
 	if (mboxes & MBOX_ASLE) {
@@ -916,6 +958,10 @@ int intel_opregion_setup(struct drm_i915_private *dev_priv)
 		opregion->asle_ext = base + OPREGION_ASLE_EXT_OFFSET;
 	}
 
+	if (mboxes & MBOX_BACKLIGHT) {
+		drm_dbg(&dev_priv->drm, "Mailbox #2 for backlight present\n");
+	}
+
 	if (intel_load_vbt_firmware(dev_priv) == 0)
 		goto out;
 
diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c
index 5358f03b52db5f188f3ff100198d2b0623f100be..76845d34ad0c316fff35e125e9bf30f1a56892bf 100644
--- a/drivers/gpu/drm/i915/display/intel_overlay.c
+++ b/drivers/gpu/drm/i915/display/intel_overlay.c
@@ -28,6 +28,7 @@
 
 #include <drm/drm_fourcc.h>
 
+#include "gem/i915_gem_internal.h"
 #include "gem/i915_gem_pm.h"
 #include "gt/intel_gpu_commands.h"
 #include "gt/intel_ring.h"
diff --git a/drivers/gpu/drm/i915/display/intel_plane_initial.c b/drivers/gpu/drm/i915/display/intel_plane_initial.c
index 01ce1d72297f1ba35fc86021d4a6d4e43cfef309..d7b1de4cc205ef3407d89004b25a61c732dbefad 100644
--- a/drivers/gpu/drm/i915/display/intel_plane_initial.c
+++ b/drivers/gpu/drm/i915/display/intel_plane_initial.c
@@ -46,17 +46,18 @@ static struct i915_vma *
 initial_plane_vma(struct drm_i915_private *i915,
 		  struct intel_initial_plane_config *plane_config)
 {
+	struct intel_memory_region *mem = i915->mm.stolen_region;
 	struct drm_i915_gem_object *obj;
 	struct i915_vma *vma;
 	u32 base, size;
 
-	if (plane_config->size == 0)
+	if (!mem || plane_config->size == 0)
 		return NULL;
 
 	base = round_down(plane_config->base,
 			  I915_GTT_MIN_ALIGNMENT);
 	size = round_up(plane_config->base + plane_config->size,
-			I915_GTT_MIN_ALIGNMENT);
+			mem->min_page_size);
 	size -= base;
 
 	/*
@@ -94,7 +95,7 @@ initial_plane_vma(struct drm_i915_private *i915,
 		goto err_obj;
 	}
 
-	vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
+	vma = i915_vma_instance(obj, &to_gt(i915)->ggtt->vm, NULL);
 	if (IS_ERR(vma))
 		goto err_obj;
 
@@ -165,8 +166,6 @@ intel_find_initial_plane_obj(struct intel_crtc *crtc,
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_crtc_state *crtc_state =
-		to_intel_crtc_state(crtc->base.state);
 	struct intel_plane *plane =
 		to_intel_plane(crtc->base.primary);
 	struct intel_plane_state *plane_state =
@@ -203,11 +202,6 @@ intel_find_initial_plane_obj(struct intel_crtc *crtc,
 	 * pretend the BIOS never had it enabled.
 	 */
 	intel_plane_disable_noatomic(crtc, plane);
-	if (crtc_state->bigjoiner) {
-		struct intel_crtc *slave =
-			crtc_state->bigjoiner_linked_crtc;
-		intel_plane_disable_noatomic(slave, to_intel_plane(slave->base.primary));
-	}
 
 	return;
 
diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c
index a1a663f362e7d5fef9c001ba5999b1fef628fc2e..2e0b092f4b6be52ca0ed5cfb73a4aaeb90a5439f 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.c
+++ b/drivers/gpu/drm/i915/display/intel_psr.c
@@ -1063,31 +1063,28 @@ static void intel_psr_activate(struct intel_dp *intel_dp)
 	intel_dp->psr.active = true;
 }
 
-static void intel_psr_enable_source(struct intel_dp *intel_dp)
+static u32 wa_16013835468_bit_get(struct intel_dp *intel_dp)
+{
+	switch (intel_dp->psr.pipe) {
+	case PIPE_A:
+		return LATENCY_REPORTING_REMOVED_PIPE_A;
+	case PIPE_B:
+		return LATENCY_REPORTING_REMOVED_PIPE_B;
+	case PIPE_C:
+		return LATENCY_REPORTING_REMOVED_PIPE_C;
+	default:
+		MISSING_CASE(intel_dp->psr.pipe);
+		return 0;
+	}
+}
+
+static void intel_psr_enable_source(struct intel_dp *intel_dp,
+				    const struct intel_crtc_state *crtc_state)
 {
 	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
 	enum transcoder cpu_transcoder = intel_dp->psr.transcoder;
 	u32 mask;
 
-	if (intel_dp->psr.psr2_enabled && DISPLAY_VER(dev_priv) == 9) {
-		i915_reg_t reg = CHICKEN_TRANS(cpu_transcoder);
-		u32 chicken = intel_de_read(dev_priv, reg);
-
-		chicken |= PSR2_VSC_ENABLE_PROG_HEADER |
-			   PSR2_ADD_VERTICAL_LINE_COUNT;
-		intel_de_write(dev_priv, reg, chicken);
-	}
-
-	/*
-	 * Wa_16014451276:adlp
-	 * All supported adlp panels have 1-based X granularity, this may
-	 * cause issues if non-supported panels are used.
-	 */
-	if (IS_ALDERLAKE_P(dev_priv) &&
-	    intel_dp->psr.psr2_enabled)
-		intel_de_rmw(dev_priv, CHICKEN_TRANS(cpu_transcoder), 0,
-			     ADLP_1_BASED_X_GRANULARITY);
-
 	/*
 	 * Per Spec: Avoid continuous PSR exit by masking MEMUP and HPD also
 	 * mask LPSP to avoid dependency on other drivers that might block
@@ -1126,18 +1123,47 @@ static void intel_psr_enable_source(struct intel_dp *intel_dp)
 			     intel_dp->psr.psr2_sel_fetch_enabled ?
 			     IGNORE_PSR2_HW_TRACKING : 0);
 
-	/* Wa_16011168373:adl-p */
-	if (IS_ADLP_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B0) &&
-	    intel_dp->psr.psr2_enabled)
-		intel_de_rmw(dev_priv,
-			     TRANS_SET_CONTEXT_LATENCY(intel_dp->psr.transcoder),
-			     TRANS_SET_CONTEXT_LATENCY_MASK,
-			     TRANS_SET_CONTEXT_LATENCY_VALUE(1));
+	if (intel_dp->psr.psr2_enabled) {
+		if (DISPLAY_VER(dev_priv) == 9)
+			intel_de_rmw(dev_priv, CHICKEN_TRANS(cpu_transcoder), 0,
+				     PSR2_VSC_ENABLE_PROG_HEADER |
+				     PSR2_ADD_VERTICAL_LINE_COUNT);
 
-	/* Wa_16012604467:adlp */
-	if (IS_ALDERLAKE_P(dev_priv) && intel_dp->psr.psr2_enabled)
-		intel_de_rmw(dev_priv, CLKGATE_DIS_MISC, 0,
-			     CLKGATE_DIS_MISC_DMASC_GATING_DIS);
+		/*
+		 * Wa_16014451276:adlp
+		 * All supported adlp panels have 1-based X granularity, this may
+		 * cause issues if non-supported panels are used.
+		 */
+		if (IS_ALDERLAKE_P(dev_priv))
+			intel_de_rmw(dev_priv, CHICKEN_TRANS(cpu_transcoder), 0,
+				     ADLP_1_BASED_X_GRANULARITY);
+
+		/* Wa_16011168373:adl-p */
+		if (IS_ADLP_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B0))
+			intel_de_rmw(dev_priv,
+				     TRANS_SET_CONTEXT_LATENCY(intel_dp->psr.transcoder),
+				     TRANS_SET_CONTEXT_LATENCY_MASK,
+				     TRANS_SET_CONTEXT_LATENCY_VALUE(1));
+
+		/* Wa_16012604467:adlp */
+		if (IS_ALDERLAKE_P(dev_priv))
+			intel_de_rmw(dev_priv, CLKGATE_DIS_MISC, 0,
+				     CLKGATE_DIS_MISC_DMASC_GATING_DIS);
+
+		/* Wa_16013835468:tgl[b0+], dg1 */
+		if (IS_TGL_DISPLAY_STEP(dev_priv, STEP_B0, STEP_FOREVER) ||
+		    IS_DG1(dev_priv)) {
+			u16 vtotal, vblank;
+
+			vtotal = crtc_state->uapi.adjusted_mode.crtc_vtotal -
+				 crtc_state->uapi.adjusted_mode.crtc_vdisplay;
+			vblank = crtc_state->uapi.adjusted_mode.crtc_vblank_end -
+				 crtc_state->uapi.adjusted_mode.crtc_vblank_start;
+			if (vblank > vtotal)
+				intel_de_rmw(dev_priv, GEN8_CHICKEN_DCPR_1, 0,
+					     wa_16013835468_bit_get(intel_dp));
+		}
+	}
 }
 
 static bool psr_interrupt_error_check(struct intel_dp *intel_dp)
@@ -1202,7 +1228,7 @@ static void intel_psr_enable_locked(struct intel_dp *intel_dp,
 	intel_write_dp_vsc_sdp(encoder, crtc_state, &crtc_state->psr_vsc);
 	intel_snps_phy_update_psr_power_state(dev_priv, phy, true);
 	intel_psr_enable_sink(intel_dp);
-	intel_psr_enable_source(intel_dp);
+	intel_psr_enable_source(intel_dp, crtc_state);
 	intel_dp->psr.enabled = true;
 	intel_dp->psr.paused = false;
 
@@ -1290,17 +1316,24 @@ static void intel_psr_disable_locked(struct intel_dp *intel_dp)
 		intel_de_rmw(dev_priv, CHICKEN_PAR1_1,
 			     DIS_RAM_BYPASS_PSR2_MAN_TRACK, 0);
 
-	/* Wa_16011168373:adl-p */
-	if (IS_ADLP_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B0) &&
-	    intel_dp->psr.psr2_enabled)
-		intel_de_rmw(dev_priv,
-			     TRANS_SET_CONTEXT_LATENCY(intel_dp->psr.transcoder),
-			     TRANS_SET_CONTEXT_LATENCY_MASK, 0);
-
-	/* Wa_16012604467:adlp */
-	if (IS_ALDERLAKE_P(dev_priv) && intel_dp->psr.psr2_enabled)
-		intel_de_rmw(dev_priv, CLKGATE_DIS_MISC,
-			     CLKGATE_DIS_MISC_DMASC_GATING_DIS, 0);
+	if (intel_dp->psr.psr2_enabled) {
+		/* Wa_16011168373:adl-p */
+		if (IS_ADLP_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B0))
+			intel_de_rmw(dev_priv,
+				     TRANS_SET_CONTEXT_LATENCY(intel_dp->psr.transcoder),
+				     TRANS_SET_CONTEXT_LATENCY_MASK, 0);
+
+		/* Wa_16012604467:adlp */
+		if (IS_ALDERLAKE_P(dev_priv))
+			intel_de_rmw(dev_priv, CLKGATE_DIS_MISC,
+				     CLKGATE_DIS_MISC_DMASC_GATING_DIS, 0);
+
+		/* Wa_16013835468:tgl[b0+], dg1 */
+		if (IS_TGL_DISPLAY_STEP(dev_priv, STEP_B0, STEP_FOREVER) ||
+		    IS_DG1(dev_priv))
+			intel_de_rmw(dev_priv, GEN8_CHICKEN_DCPR_1,
+				     wa_16013835468_bit_get(intel_dp), 0);
+	}
 
 	intel_snps_phy_update_psr_power_state(dev_priv, phy, false);
 
diff --git a/drivers/gpu/drm/i915/display/intel_snps_phy.c b/drivers/gpu/drm/i915/display/intel_snps_phy.c
index 8573a458811a0e936a0421619e7e5f818d228e65..7e6245b97fedab38898fa795a35d44411decfb83 100644
--- a/drivers/gpu/drm/i915/display/intel_snps_phy.c
+++ b/drivers/gpu/drm/i915/display/intel_snps_phy.c
@@ -32,10 +32,10 @@ void intel_snps_phy_wait_for_calibration(struct drm_i915_private *i915)
 		if (!intel_phy_is_snps(i915, phy))
 			continue;
 
-		if (intel_de_wait_for_clear(i915, ICL_PHY_MISC(phy),
+		if (intel_de_wait_for_clear(i915, DG2_PHY_MISC(phy),
 					    DG2_PHY_DP_TX_ACK_MASK, 25))
 			drm_err(&i915->drm, "SNPS PHY %c failed to calibrate after 25ms.\n",
-				phy);
+				phy_name(phy));
 	}
 }
 
@@ -250,197 +250,6 @@ static const struct intel_mpllb_state * const dg2_dp_100_tables[] = {
 	NULL,
 };
 
-/*
- * Basic DP link rates with 38.4 MHz reference clock.
- */
-
-static const struct intel_mpllb_state dg2_dp_rbr_38_4 = {
-	.clock = 162000,
-	.ref_control =
-		REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 1),
-	.mpllb_cp =
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 5) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 25) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 65) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 127),
-	.mpllb_div =
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 2) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 2),
-	.mpllb_div2 =
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 304),
-	.mpllb_fracn1 =
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 1),
-	.mpllb_fracn2 =
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 49152),
-};
-
-static const struct intel_mpllb_state dg2_dp_hbr1_38_4 = {
-	.clock = 270000,
-	.ref_control =
-		REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 1),
-	.mpllb_cp =
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 5) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 25) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 65) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 127),
-	.mpllb_div =
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 1) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 3),
-	.mpllb_div2 =
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 248),
-	.mpllb_fracn1 =
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 1),
-	.mpllb_fracn2 =
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 40960),
-};
-
-static const struct intel_mpllb_state dg2_dp_hbr2_38_4 = {
-	.clock = 540000,
-	.ref_control =
-		REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 1),
-	.mpllb_cp =
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 5) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 25) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 65) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 127),
-	.mpllb_div =
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 3),
-	.mpllb_div2 =
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 248),
-	.mpllb_fracn1 =
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 1),
-	.mpllb_fracn2 =
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 40960),
-};
-
-static const struct intel_mpllb_state dg2_dp_hbr3_38_4 = {
-	.clock = 810000,
-	.ref_control =
-		REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 1),
-	.mpllb_cp =
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 6) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 26) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 65) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 127),
-	.mpllb_div =
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2),
-	.mpllb_div2 =
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 388),
-	.mpllb_fracn1 =
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 1),
-	.mpllb_fracn2 =
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 61440),
-};
-
-static const struct intel_mpllb_state dg2_dp_uhbr10_38_4 = {
-	.clock = 1000000,
-	.ref_control =
-		REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 1),
-	.mpllb_cp =
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 5) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 26) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 65) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 127),
-	.mpllb_div =
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV_CLK_EN, 1) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV_MULTIPLIER, 8) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_WORD_DIV2_EN, 1) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_DP2_MODE, 1) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_SHIM_DIV32_CLK_SEL, 1) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2),
-	.mpllb_div2 =
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 488),
-	.mpllb_fracn1 =
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 3),
-	.mpllb_fracn2 =
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 2) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 27306),
-
-	/*
-	 * SSC will be enabled, DP UHBR has a minimum SSC requirement.
-	 */
-	.mpllb_sscen =
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_EN, 1) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_PEAK, 76800),
-	.mpllb_sscstep =
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_STEPSIZE, 129024),
-};
-
-static const struct intel_mpllb_state dg2_dp_uhbr13_38_4 = {
-	.clock = 1350000,
-	.ref_control =
-		REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 1),
-	.mpllb_cp =
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 6) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 56) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 65) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 127),
-	.mpllb_div =
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV_CLK_EN, 1) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV_MULTIPLIER, 8) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_WORD_DIV2_EN, 1) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_DP2_MODE, 1) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 3),
-	.mpllb_div2 =
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 670),
-	.mpllb_fracn1 =
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 1),
-	.mpllb_fracn2 =
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 36864),
-
-	/*
-	 * SSC will be enabled, DP UHBR has a minimum SSC requirement.
-	 */
-	.mpllb_sscen =
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_EN, 1) |
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_PEAK, 103680),
-	.mpllb_sscstep =
-		REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_STEPSIZE, 174182),
-};
-
-static const struct intel_mpllb_state * const dg2_dp_38_4_tables[] = {
-	&dg2_dp_rbr_38_4,
-	&dg2_dp_hbr1_38_4,
-	&dg2_dp_hbr2_38_4,
-	&dg2_dp_hbr3_38_4,
-	&dg2_dp_uhbr10_38_4,
-	&dg2_dp_uhbr13_38_4,
-	NULL,
-};
-
 /*
  * eDP link rates with 100 MHz reference clock.
  */
@@ -749,22 +558,7 @@ intel_mpllb_tables_get(struct intel_crtc_state *crtc_state,
 	if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP)) {
 		return dg2_edp_tables;
 	} else if (intel_crtc_has_dp_encoder(crtc_state)) {
-		/*
-		 * FIXME: Initially we're just enabling the "combo" outputs on
-		 * port A-D.  The MPLLB for those ports takes an input from the
-		 * "Display Filter PLL" which always has an output frequency
-		 * of 100 MHz, hence the use of the _100 tables below.
-		 *
-		 * Once we enable port TC1 it will either use the same 100 MHz
-		 * "Display Filter PLL" (when strapped to support a native
-		 * display connection) or different 38.4 MHz "Filter PLL" when
-		 * strapped to support a USB connection, so we'll need to check
-		 * that to determine which table to use.
-		 */
-		if (0)
-			return dg2_dp_38_4_tables;
-		else
-			return dg2_dp_100_tables;
+		return dg2_dp_100_tables;
 	} else if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) {
 		return dg2_hdmi_tables;
 	}
diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c
index feead08ddf8fffce5d125f4d9a7e944bc29f8b28..fc037c027ea5aa28170dc92d1dce8342d80c4278 100644
--- a/drivers/gpu/drm/i915/display/intel_tc.c
+++ b/drivers/gpu/drm/i915/display/intel_tc.c
@@ -693,6 +693,8 @@ void intel_tc_port_sanitize(struct intel_digital_port *dig_port)
 {
 	struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev);
 	struct intel_encoder *encoder = &dig_port->base;
+	intel_wakeref_t tc_cold_wref;
+	enum intel_display_power_domain domain;
 	int active_links = 0;
 
 	mutex_lock(&dig_port->tc_lock);
@@ -704,12 +706,11 @@ void intel_tc_port_sanitize(struct intel_digital_port *dig_port)
 
 	drm_WARN_ON(&i915->drm, dig_port->tc_mode != TC_PORT_DISCONNECTED);
 	drm_WARN_ON(&i915->drm, dig_port->tc_lock_wakeref);
-	if (active_links) {
-		enum intel_display_power_domain domain;
-		intel_wakeref_t tc_cold_wref = tc_cold_block(dig_port, &domain);
 
-		dig_port->tc_mode = intel_tc_port_get_current_mode(dig_port);
+	tc_cold_wref = tc_cold_block(dig_port, &domain);
 
+	dig_port->tc_mode = intel_tc_port_get_current_mode(dig_port);
+	if (active_links) {
 		if (!icl_tc_phy_is_connected(dig_port))
 			drm_dbg_kms(&i915->drm,
 				    "Port %s: PHY disconnected with %d active link(s)\n",
@@ -718,10 +719,23 @@ void intel_tc_port_sanitize(struct intel_digital_port *dig_port)
 
 		dig_port->tc_lock_wakeref = tc_cold_block(dig_port,
 							  &dig_port->tc_lock_power_domain);
-
-		tc_cold_unblock(dig_port, domain, tc_cold_wref);
+	} else {
+		/*
+		 * TBT-alt is the default mode in any case the PHY ownership is not
+		 * held (regardless of the sink's connected live state), so
+		 * we'll just switch to disconnected mode from it here without
+		 * a note.
+		 */
+		if (dig_port->tc_mode != TC_PORT_TBT_ALT)
+			drm_dbg_kms(&i915->drm,
+				    "Port %s: PHY left in %s mode on disabled port, disconnecting it\n",
+				    dig_port->tc_port_name,
+				    tc_port_mode_name(dig_port->tc_mode));
+		icl_tc_phy_disconnect(dig_port);
 	}
 
+	tc_cold_unblock(dig_port, domain, tc_cold_wref);
+
 	drm_dbg_kms(&i915->drm, "Port %s: sanitize mode (%s)\n",
 		    dig_port->tc_port_name,
 		    tc_port_mode_name(dig_port->tc_mode));
diff --git a/drivers/gpu/drm/i915/display/intel_vbt_defs.h b/drivers/gpu/drm/i915/display/intel_vbt_defs.h
index a39d6cfea87aab18d998f0c98dacc4d966ed00c2..b9397d9363c52c7cd1e5f380b22a54644e90a92e 100644
--- a/drivers/gpu/drm/i915/display/intel_vbt_defs.h
+++ b/drivers/gpu/drm/i915/display/intel_vbt_defs.h
@@ -162,6 +162,14 @@ struct bdb_general_features {
 	u8 dp_ssc_freq:1;	/* SSC freq for PCH attached eDP */
 	u8 dp_ssc_dongle_supported:1;
 	u8 rsvd11:2; /* finish byte */
+
+	/* bits 6 */
+	u8 tc_hpd_retry_timeout:7; /* 242 */
+	u8 rsvd12:1;
+
+	/* bits 7 */
+	u8 afc_startup_config:2;/* 249 */
+	u8 rsvd13:6;
 } __packed;
 
 /*
diff --git a/drivers/gpu/drm/i915/display/intel_vdsc.c b/drivers/gpu/drm/i915/display/intel_vdsc.c
index 3faea903b9ae5ae522102317673f5e6ab7445b36..545eff5bf158c37e8d0a92eac2b358b4571effa3 100644
--- a/drivers/gpu/drm/i915/display/intel_vdsc.c
+++ b/drivers/gpu/drm/i915/display/intel_vdsc.c
@@ -1107,18 +1107,6 @@ static i915_reg_t dss_ctl2_reg(struct intel_crtc *crtc, enum transcoder cpu_tran
 		ICL_PIPE_DSS_CTL2(crtc->pipe) : DSS_CTL2;
 }
 
-struct intel_crtc *
-intel_dsc_get_bigjoiner_secondary(const struct intel_crtc *primary_crtc)
-{
-	return intel_crtc_for_pipe(to_i915(primary_crtc->base.dev), primary_crtc->pipe + 1);
-}
-
-static struct intel_crtc *
-intel_dsc_get_bigjoiner_primary(const struct intel_crtc *secondary_crtc)
-{
-	return intel_crtc_for_pipe(to_i915(secondary_crtc->base.dev), secondary_crtc->pipe - 1);
-}
-
 void intel_uncompressed_joiner_enable(const struct intel_crtc_state *crtc_state)
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
@@ -1126,7 +1114,7 @@ void intel_uncompressed_joiner_enable(const struct intel_crtc_state *crtc_state)
 	u32 dss_ctl1_val = 0;
 
 	if (crtc_state->bigjoiner && !crtc_state->dsc.compression_enable) {
-		if (crtc_state->bigjoiner_slave)
+		if (intel_crtc_is_bigjoiner_slave(crtc_state))
 			dss_ctl1_val |= UNCOMPRESSED_JOINER_SLAVE;
 		else
 			dss_ctl1_val |= UNCOMPRESSED_JOINER_MASTER;
@@ -1154,7 +1142,7 @@ void intel_dsc_enable(const struct intel_crtc_state *crtc_state)
 	}
 	if (crtc_state->bigjoiner) {
 		dss_ctl1_val |= BIG_JOINER_ENABLE;
-		if (!crtc_state->bigjoiner_slave)
+		if (!intel_crtc_is_bigjoiner_slave(crtc_state))
 			dss_ctl1_val |= MASTER_BIG_JOINER_ENABLE;
 	}
 	intel_de_write(dev_priv, dss_ctl1_reg(crtc, crtc_state->cpu_transcoder), dss_ctl1_val);
@@ -1174,25 +1162,6 @@ void intel_dsc_disable(const struct intel_crtc_state *old_crtc_state)
 	}
 }
 
-void intel_uncompressed_joiner_get_config(struct intel_crtc_state *crtc_state)
-{
-	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
-	u32 dss_ctl1;
-
-	dss_ctl1 = intel_de_read(dev_priv, dss_ctl1_reg(crtc, crtc_state->cpu_transcoder));
-	if (dss_ctl1 & UNCOMPRESSED_JOINER_MASTER) {
-		crtc_state->bigjoiner = true;
-		crtc_state->bigjoiner_linked_crtc = intel_dsc_get_bigjoiner_secondary(crtc);
-		drm_WARN_ON(&dev_priv->drm, !crtc_state->bigjoiner_linked_crtc);
-	} else if (dss_ctl1 & UNCOMPRESSED_JOINER_SLAVE) {
-		crtc_state->bigjoiner = true;
-		crtc_state->bigjoiner_slave = true;
-		crtc_state->bigjoiner_linked_crtc = intel_dsc_get_bigjoiner_primary(crtc);
-		drm_WARN_ON(&dev_priv->drm, !crtc_state->bigjoiner_linked_crtc);
-	}
-}
-
 void intel_dsc_get_config(struct intel_crtc_state *crtc_state)
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
@@ -1223,18 +1192,6 @@ void intel_dsc_get_config(struct intel_crtc_state *crtc_state)
 	crtc_state->dsc.dsc_split = (dss_ctl2 & RIGHT_BRANCH_VDSC_ENABLE) &&
 		(dss_ctl1 & JOINER_ENABLE);
 
-	if (dss_ctl1 & BIG_JOINER_ENABLE) {
-		crtc_state->bigjoiner = true;
-
-		if (!(dss_ctl1 & MASTER_BIG_JOINER_ENABLE)) {
-			crtc_state->bigjoiner_slave = true;
-			crtc_state->bigjoiner_linked_crtc = intel_dsc_get_bigjoiner_primary(crtc);
-		} else {
-			crtc_state->bigjoiner_linked_crtc = intel_dsc_get_bigjoiner_secondary(crtc);
-		}
-		drm_WARN_ON(&dev_priv->drm, !crtc_state->bigjoiner_linked_crtc);
-	}
-
 	/* FIXME: add more state readout as needed */
 
 	/* PPS1 */
diff --git a/drivers/gpu/drm/i915/display/intel_vdsc.h b/drivers/gpu/drm/i915/display/intel_vdsc.h
index 4ec75f7159869baae7ab3d77aa9dff4ab3643322..8763f00fa7e252330fda152b815f3f6c82939624 100644
--- a/drivers/gpu/drm/i915/display/intel_vdsc.h
+++ b/drivers/gpu/drm/i915/display/intel_vdsc.h
@@ -18,7 +18,6 @@ void intel_uncompressed_joiner_enable(const struct intel_crtc_state *crtc_state)
 void intel_dsc_enable(const struct intel_crtc_state *crtc_state);
 void intel_dsc_disable(const struct intel_crtc_state *crtc_state);
 int intel_dsc_compute_params(struct intel_crtc_state *pipe_config);
-void intel_uncompressed_joiner_get_config(struct intel_crtc_state *crtc_state);
 void intel_dsc_get_config(struct intel_crtc_state *crtc_state);
 enum intel_display_power_domain
 intel_dsc_power_domain(struct intel_crtc *crtc, enum transcoder cpu_transcoder);
diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c
index 20141f33ed6444d6b5e2de77223de077f0b7dc8b..0d936f658b3f70ca97f782c7941e42eccdac6210 100644
--- a/drivers/gpu/drm/i915/display/vlv_dsi.c
+++ b/drivers/gpu/drm/i915/display/vlv_dsi.c
@@ -44,6 +44,7 @@
 #include "skl_scaler.h"
 #include "vlv_dsi.h"
 #include "vlv_dsi_pll.h"
+#include "vlv_dsi_regs.h"
 #include "vlv_sideband.h"
 
 /* return pixels in terms of txbyteclkhs */
@@ -1492,7 +1493,7 @@ static void intel_dsi_prepare(struct intel_encoder *intel_encoder,
 		 */
 
 		if (is_vid_mode(intel_dsi) &&
-			intel_dsi->video_mode_format == VIDEO_MODE_BURST) {
+			intel_dsi->video_mode == BURST_MODE) {
 			intel_de_write(dev_priv, MIPI_HS_TX_TIMEOUT(port),
 				       txbyteclkhs(adjusted_mode->crtc_htotal, bpp, intel_dsi->lane_count, intel_dsi->burst_mode_ratio) + 1);
 		} else {
@@ -1568,12 +1569,33 @@ static void intel_dsi_prepare(struct intel_encoder *intel_encoder,
 		intel_de_write(dev_priv, MIPI_CLK_LANE_SWITCH_TIME_CNT(port),
 			       intel_dsi->clk_lp_to_hs_count << LP_HS_SSW_CNT_SHIFT | intel_dsi->clk_hs_to_lp_count << HS_LP_PWR_SW_CNT_SHIFT);
 
-		if (is_vid_mode(intel_dsi))
-			/* Some panels might have resolution which is not a
+		if (is_vid_mode(intel_dsi)) {
+			u32 fmt = intel_dsi->video_frmt_cfg_bits | IP_TG_CONFIG;
+
+			/*
+			 * Some panels might have resolution which is not a
 			 * multiple of 64 like 1366 x 768. Enable RANDOM
-			 * resolution support for such panels by default */
-			intel_de_write(dev_priv, MIPI_VIDEO_MODE_FORMAT(port),
-				       intel_dsi->video_frmt_cfg_bits | intel_dsi->video_mode_format | IP_TG_CONFIG | RANDOM_DPI_DISPLAY_RESOLUTION);
+			 * resolution support for such panels by default.
+			 */
+			fmt |= RANDOM_DPI_DISPLAY_RESOLUTION;
+
+			switch (intel_dsi->video_mode) {
+			default:
+				MISSING_CASE(intel_dsi->video_mode);
+				fallthrough;
+			case NON_BURST_SYNC_EVENTS:
+				fmt |= VIDEO_MODE_NON_BURST_WITH_SYNC_EVENTS;
+				break;
+			case NON_BURST_SYNC_PULSE:
+				fmt |= VIDEO_MODE_NON_BURST_WITH_SYNC_PULSE;
+				break;
+			case BURST_MODE:
+				fmt |= VIDEO_MODE_BURST;
+				break;
+			}
+
+			intel_de_write(dev_priv, MIPI_VIDEO_MODE_FORMAT(port), fmt);
+		}
 	}
 }
 
diff --git a/drivers/gpu/drm/i915/display/vlv_dsi_pll.c b/drivers/gpu/drm/i915/display/vlv_dsi_pll.c
index 1b81797dd02ec6f530480cd37353db358456e5bd..df880f44700a8b2704b475699f5c6bcca81d430a 100644
--- a/drivers/gpu/drm/i915/display/vlv_dsi_pll.c
+++ b/drivers/gpu/drm/i915/display/vlv_dsi_pll.c
@@ -32,6 +32,7 @@
 #include "intel_display_types.h"
 #include "intel_dsi.h"
 #include "vlv_dsi_pll.h"
+#include "vlv_dsi_pll_regs.h"
 #include "vlv_sideband.h"
 
 static const u16 lfsr_converts[] = {
diff --git a/drivers/gpu/drm/i915/display/vlv_dsi_pll_regs.h b/drivers/gpu/drm/i915/display/vlv_dsi_pll_regs.h
new file mode 100644
index 0000000000000000000000000000000000000000..45590e14e54ba6821cbc75578cc27689276c415a
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/vlv_dsi_pll_regs.h
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef __VLV_DSI_PLL_REGS_H__
+#define __VLV_DSI_PLL_REGS_H__
+
+#include "vlv_dsi_regs.h"
+
+#define MIPIO_TXESC_CLK_DIV1			_MMIO(0x160004)
+#define  GLK_TX_ESC_CLK_DIV1_MASK			0x3FF
+#define MIPIO_TXESC_CLK_DIV2			_MMIO(0x160008)
+#define  GLK_TX_ESC_CLK_DIV2_MASK			0x3FF
+
+#define BXT_MAX_VAR_OUTPUT_KHZ			39500
+
+#define BXT_MIPI_CLOCK_CTL			_MMIO(0x46090)
+#define  BXT_MIPI1_DIV_SHIFT			26
+#define  BXT_MIPI2_DIV_SHIFT			10
+#define  BXT_MIPI_DIV_SHIFT(port)		\
+			_MIPI_PORT(port, BXT_MIPI1_DIV_SHIFT, \
+					BXT_MIPI2_DIV_SHIFT)
+
+/* TX control divider to select actual TX clock output from (8x/var) */
+#define  BXT_MIPI1_TX_ESCLK_SHIFT		26
+#define  BXT_MIPI2_TX_ESCLK_SHIFT		10
+#define  BXT_MIPI_TX_ESCLK_SHIFT(port)		\
+			_MIPI_PORT(port, BXT_MIPI1_TX_ESCLK_SHIFT, \
+					BXT_MIPI2_TX_ESCLK_SHIFT)
+#define  BXT_MIPI1_TX_ESCLK_FIXDIV_MASK		(0x3F << 26)
+#define  BXT_MIPI2_TX_ESCLK_FIXDIV_MASK		(0x3F << 10)
+#define  BXT_MIPI_TX_ESCLK_FIXDIV_MASK(port)	\
+			_MIPI_PORT(port, BXT_MIPI1_TX_ESCLK_FIXDIV_MASK, \
+					BXT_MIPI2_TX_ESCLK_FIXDIV_MASK)
+#define  BXT_MIPI_TX_ESCLK_DIVIDER(port, val)	\
+		(((val) & 0x3F) << BXT_MIPI_TX_ESCLK_SHIFT(port))
+/* RX upper control divider to select actual RX clock output from 8x */
+#define  BXT_MIPI1_RX_ESCLK_UPPER_SHIFT		21
+#define  BXT_MIPI2_RX_ESCLK_UPPER_SHIFT		5
+#define  BXT_MIPI_RX_ESCLK_UPPER_SHIFT(port)		\
+			_MIPI_PORT(port, BXT_MIPI1_RX_ESCLK_UPPER_SHIFT, \
+					BXT_MIPI2_RX_ESCLK_UPPER_SHIFT)
+#define  BXT_MIPI1_RX_ESCLK_UPPER_FIXDIV_MASK		(3 << 21)
+#define  BXT_MIPI2_RX_ESCLK_UPPER_FIXDIV_MASK		(3 << 5)
+#define  BXT_MIPI_RX_ESCLK_UPPER_FIXDIV_MASK(port)	\
+			_MIPI_PORT(port, BXT_MIPI1_RX_ESCLK_UPPER_FIXDIV_MASK, \
+					BXT_MIPI2_RX_ESCLK_UPPER_FIXDIV_MASK)
+#define  BXT_MIPI_RX_ESCLK_UPPER_DIVIDER(port, val)	\
+		(((val) & 3) << BXT_MIPI_RX_ESCLK_UPPER_SHIFT(port))
+/* 8/3X divider to select the actual 8/3X clock output from 8x */
+#define  BXT_MIPI1_8X_BY3_SHIFT                19
+#define  BXT_MIPI2_8X_BY3_SHIFT                3
+#define  BXT_MIPI_8X_BY3_SHIFT(port)          \
+			_MIPI_PORT(port, BXT_MIPI1_8X_BY3_SHIFT, \
+					BXT_MIPI2_8X_BY3_SHIFT)
+#define  BXT_MIPI1_8X_BY3_DIVIDER_MASK         (3 << 19)
+#define  BXT_MIPI2_8X_BY3_DIVIDER_MASK         (3 << 3)
+#define  BXT_MIPI_8X_BY3_DIVIDER_MASK(port)    \
+			_MIPI_PORT(port, BXT_MIPI1_8X_BY3_DIVIDER_MASK, \
+						BXT_MIPI2_8X_BY3_DIVIDER_MASK)
+#define  BXT_MIPI_8X_BY3_DIVIDER(port, val)    \
+			(((val) & 3) << BXT_MIPI_8X_BY3_SHIFT(port))
+/* RX lower control divider to select actual RX clock output from 8x */
+#define  BXT_MIPI1_RX_ESCLK_LOWER_SHIFT		16
+#define  BXT_MIPI2_RX_ESCLK_LOWER_SHIFT		0
+#define  BXT_MIPI_RX_ESCLK_LOWER_SHIFT(port)		\
+			_MIPI_PORT(port, BXT_MIPI1_RX_ESCLK_LOWER_SHIFT, \
+					BXT_MIPI2_RX_ESCLK_LOWER_SHIFT)
+#define  BXT_MIPI1_RX_ESCLK_LOWER_FIXDIV_MASK		(3 << 16)
+#define  BXT_MIPI2_RX_ESCLK_LOWER_FIXDIV_MASK		(3 << 0)
+#define  BXT_MIPI_RX_ESCLK_LOWER_FIXDIV_MASK(port)	\
+			_MIPI_PORT(port, BXT_MIPI1_RX_ESCLK_LOWER_FIXDIV_MASK, \
+					BXT_MIPI2_RX_ESCLK_LOWER_FIXDIV_MASK)
+#define  BXT_MIPI_RX_ESCLK_LOWER_DIVIDER(port, val)	\
+		(((val) & 3) << BXT_MIPI_RX_ESCLK_LOWER_SHIFT(port))
+
+#define RX_DIVIDER_BIT_1_2                     0x3
+#define RX_DIVIDER_BIT_3_4                     0xC
+
+#define BXT_DSI_PLL_CTL			_MMIO(0x161000)
+#define  BXT_DSI_PLL_PVD_RATIO_SHIFT	16
+#define  BXT_DSI_PLL_PVD_RATIO_MASK	(3 << BXT_DSI_PLL_PVD_RATIO_SHIFT)
+#define  BXT_DSI_PLL_PVD_RATIO_1	(1 << BXT_DSI_PLL_PVD_RATIO_SHIFT)
+#define  BXT_DSIC_16X_BY1		(0 << 10)
+#define  BXT_DSIC_16X_BY2		(1 << 10)
+#define  BXT_DSIC_16X_BY3		(2 << 10)
+#define  BXT_DSIC_16X_BY4		(3 << 10)
+#define  BXT_DSIC_16X_MASK		(3 << 10)
+#define  BXT_DSIA_16X_BY1		(0 << 8)
+#define  BXT_DSIA_16X_BY2		(1 << 8)
+#define  BXT_DSIA_16X_BY3		(2 << 8)
+#define  BXT_DSIA_16X_BY4		(3 << 8)
+#define  BXT_DSIA_16X_MASK		(3 << 8)
+#define  BXT_DSI_FREQ_SEL_SHIFT		8
+#define  BXT_DSI_FREQ_SEL_MASK		(0xF << BXT_DSI_FREQ_SEL_SHIFT)
+
+#define BXT_DSI_PLL_RATIO_MAX		0x7D
+#define BXT_DSI_PLL_RATIO_MIN		0x22
+#define GLK_DSI_PLL_RATIO_MAX		0x6F
+#define GLK_DSI_PLL_RATIO_MIN		0x22
+#define BXT_DSI_PLL_RATIO_MASK		0xFF
+#define BXT_REF_CLOCK_KHZ		19200
+
+#define BXT_DSI_PLL_ENABLE		_MMIO(0x46080)
+#define  BXT_DSI_PLL_DO_ENABLE		(1 << 31)
+#define  BXT_DSI_PLL_LOCKED		(1 << 30)
+
+#endif /* __VLV_DSI_PLL_REGS_H__ */
diff --git a/drivers/gpu/drm/i915/display/vlv_dsi_regs.h b/drivers/gpu/drm/i915/display/vlv_dsi_regs.h
new file mode 100644
index 0000000000000000000000000000000000000000..356e515153463307550d767b875251032b3f49d3
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/vlv_dsi_regs.h
@@ -0,0 +1,480 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef __VLV_DSI_REGS_H__
+#define __VLV_DSI_REGS_H__
+
+#include "i915_reg_defs.h"
+
+#define VLV_MIPI_BASE			VLV_DISPLAY_BASE
+#define BXT_MIPI_BASE			0x60000
+
+#define _MIPI_PORT(port, a, c)	(((port) == PORT_A) ? a : c)	/* ports A and C only */
+#define _MMIO_MIPI(port, a, c)	_MMIO(_MIPI_PORT(port, a, c))
+
+/* BXT MIPI mode configure */
+#define  _BXT_MIPIA_TRANS_HACTIVE			0x6B0F8
+#define  _BXT_MIPIC_TRANS_HACTIVE			0x6B8F8
+#define  BXT_MIPI_TRANS_HACTIVE(tc)	_MMIO_MIPI(tc, \
+		_BXT_MIPIA_TRANS_HACTIVE, _BXT_MIPIC_TRANS_HACTIVE)
+
+#define  _BXT_MIPIA_TRANS_VACTIVE			0x6B0FC
+#define  _BXT_MIPIC_TRANS_VACTIVE			0x6B8FC
+#define  BXT_MIPI_TRANS_VACTIVE(tc)	_MMIO_MIPI(tc, \
+		_BXT_MIPIA_TRANS_VACTIVE, _BXT_MIPIC_TRANS_VACTIVE)
+
+#define  _BXT_MIPIA_TRANS_VTOTAL			0x6B100
+#define  _BXT_MIPIC_TRANS_VTOTAL			0x6B900
+#define  BXT_MIPI_TRANS_VTOTAL(tc)	_MMIO_MIPI(tc, \
+		_BXT_MIPIA_TRANS_VTOTAL, _BXT_MIPIC_TRANS_VTOTAL)
+
+#define BXT_P_DSI_REGULATOR_CFG			_MMIO(0x160020)
+#define  STAP_SELECT					(1 << 0)
+
+#define BXT_P_DSI_REGULATOR_TX_CTRL		_MMIO(0x160054)
+#define  HS_IO_CTRL_SELECT				(1 << 0)
+
+#define _MIPIA_PORT_CTRL			(VLV_DISPLAY_BASE + 0x61190)
+#define _MIPIC_PORT_CTRL			(VLV_DISPLAY_BASE + 0x61700)
+#define MIPI_PORT_CTRL(port)	_MMIO_MIPI(port, _MIPIA_PORT_CTRL, _MIPIC_PORT_CTRL)
+
+ /* BXT port control */
+#define _BXT_MIPIA_PORT_CTRL				0x6B0C0
+#define _BXT_MIPIC_PORT_CTRL				0x6B8C0
+#define BXT_MIPI_PORT_CTRL(tc)	_MMIO_MIPI(tc, _BXT_MIPIA_PORT_CTRL, _BXT_MIPIC_PORT_CTRL)
+
+#define  DPI_ENABLE					(1 << 31) /* A + C */
+#define  MIPIA_MIPI4DPHY_DELAY_COUNT_SHIFT		27
+#define  MIPIA_MIPI4DPHY_DELAY_COUNT_MASK		(0xf << 27)
+#define  DUAL_LINK_MODE_SHIFT				26
+#define  DUAL_LINK_MODE_MASK				(1 << 26)
+#define  DUAL_LINK_MODE_FRONT_BACK			(0 << 26)
+#define  DUAL_LINK_MODE_PIXEL_ALTERNATIVE		(1 << 26)
+#define  DITHERING_ENABLE				(1 << 25) /* A + C */
+#define  FLOPPED_HSTX					(1 << 23)
+#define  DE_INVERT					(1 << 19) /* XXX */
+#define  MIPIA_FLISDSI_DELAY_COUNT_SHIFT		18
+#define  MIPIA_FLISDSI_DELAY_COUNT_MASK			(0xf << 18)
+#define  AFE_LATCHOUT					(1 << 17)
+#define  LP_OUTPUT_HOLD					(1 << 16)
+#define  MIPIC_FLISDSI_DELAY_COUNT_HIGH_SHIFT		15
+#define  MIPIC_FLISDSI_DELAY_COUNT_HIGH_MASK		(1 << 15)
+#define  MIPIC_MIPI4DPHY_DELAY_COUNT_SHIFT		11
+#define  MIPIC_MIPI4DPHY_DELAY_COUNT_MASK		(0xf << 11)
+#define  CSB_SHIFT					9
+#define  CSB_MASK					(3 << 9)
+#define  CSB_20MHZ					(0 << 9)
+#define  CSB_10MHZ					(1 << 9)
+#define  CSB_40MHZ					(2 << 9)
+#define  BANDGAP_MASK					(1 << 8)
+#define  BANDGAP_PNW_CIRCUIT				(0 << 8)
+#define  BANDGAP_LNC_CIRCUIT				(1 << 8)
+#define  MIPIC_FLISDSI_DELAY_COUNT_LOW_SHIFT		5
+#define  MIPIC_FLISDSI_DELAY_COUNT_LOW_MASK		(7 << 5)
+#define  TEARING_EFFECT_DELAY				(1 << 4) /* A + C */
+#define  TEARING_EFFECT_SHIFT				2 /* A + C */
+#define  TEARING_EFFECT_MASK				(3 << 2)
+#define  TEARING_EFFECT_OFF				(0 << 2)
+#define  TEARING_EFFECT_DSI				(1 << 2)
+#define  TEARING_EFFECT_GPIO				(2 << 2)
+#define  LANE_CONFIGURATION_SHIFT			0
+#define  LANE_CONFIGURATION_MASK			(3 << 0)
+#define  LANE_CONFIGURATION_4LANE			(0 << 0)
+#define  LANE_CONFIGURATION_DUAL_LINK_A			(1 << 0)
+#define  LANE_CONFIGURATION_DUAL_LINK_B			(2 << 0)
+
+#define _MIPIA_TEARING_CTRL			(VLV_DISPLAY_BASE + 0x61194)
+#define _MIPIC_TEARING_CTRL			(VLV_DISPLAY_BASE + 0x61704)
+#define MIPI_TEARING_CTRL(port)			_MMIO_MIPI(port, _MIPIA_TEARING_CTRL, _MIPIC_TEARING_CTRL)
+#define  TEARING_EFFECT_DELAY_SHIFT			0
+#define  TEARING_EFFECT_DELAY_MASK			(0xffff << 0)
+
+/* XXX: all bits reserved */
+#define _MIPIA_AUTOPWG			(VLV_DISPLAY_BASE + 0x611a0)
+
+/* MIPI DSI Controller and D-PHY registers */
+
+#define _MIPIA_DEVICE_READY		(dev_priv->mipi_mmio_base + 0xb000)
+#define _MIPIC_DEVICE_READY		(dev_priv->mipi_mmio_base + 0xb800)
+#define MIPI_DEVICE_READY(port)		_MMIO_MIPI(port, _MIPIA_DEVICE_READY, _MIPIC_DEVICE_READY)
+#define  BUS_POSSESSION					(1 << 3) /* set to give bus to receiver */
+#define  ULPS_STATE_MASK				(3 << 1)
+#define  ULPS_STATE_ENTER				(2 << 1)
+#define  ULPS_STATE_EXIT				(1 << 1)
+#define  ULPS_STATE_NORMAL_OPERATION			(0 << 1)
+#define  DEVICE_READY					(1 << 0)
+
+#define _MIPIA_INTR_STAT		(dev_priv->mipi_mmio_base + 0xb004)
+#define _MIPIC_INTR_STAT		(dev_priv->mipi_mmio_base + 0xb804)
+#define MIPI_INTR_STAT(port)		_MMIO_MIPI(port, _MIPIA_INTR_STAT, _MIPIC_INTR_STAT)
+#define _MIPIA_INTR_EN			(dev_priv->mipi_mmio_base + 0xb008)
+#define _MIPIC_INTR_EN			(dev_priv->mipi_mmio_base + 0xb808)
+#define MIPI_INTR_EN(port)		_MMIO_MIPI(port, _MIPIA_INTR_EN, _MIPIC_INTR_EN)
+#define  TEARING_EFFECT					(1 << 31)
+#define  SPL_PKT_SENT_INTERRUPT				(1 << 30)
+#define  GEN_READ_DATA_AVAIL				(1 << 29)
+#define  LP_GENERIC_WR_FIFO_FULL			(1 << 28)
+#define  HS_GENERIC_WR_FIFO_FULL			(1 << 27)
+#define  RX_PROT_VIOLATION				(1 << 26)
+#define  RX_INVALID_TX_LENGTH				(1 << 25)
+#define  ACK_WITH_NO_ERROR				(1 << 24)
+#define  TURN_AROUND_ACK_TIMEOUT			(1 << 23)
+#define  LP_RX_TIMEOUT					(1 << 22)
+#define  HS_TX_TIMEOUT					(1 << 21)
+#define  DPI_FIFO_UNDERRUN				(1 << 20)
+#define  LOW_CONTENTION					(1 << 19)
+#define  HIGH_CONTENTION				(1 << 18)
+#define  TXDSI_VC_ID_INVALID				(1 << 17)
+#define  TXDSI_DATA_TYPE_NOT_RECOGNISED			(1 << 16)
+#define  TXCHECKSUM_ERROR				(1 << 15)
+#define  TXECC_MULTIBIT_ERROR				(1 << 14)
+#define  TXECC_SINGLE_BIT_ERROR				(1 << 13)
+#define  TXFALSE_CONTROL_ERROR				(1 << 12)
+#define  RXDSI_VC_ID_INVALID				(1 << 11)
+#define  RXDSI_DATA_TYPE_NOT_REGOGNISED			(1 << 10)
+#define  RXCHECKSUM_ERROR				(1 << 9)
+#define  RXECC_MULTIBIT_ERROR				(1 << 8)
+#define  RXECC_SINGLE_BIT_ERROR				(1 << 7)
+#define  RXFALSE_CONTROL_ERROR				(1 << 6)
+#define  RXHS_RECEIVE_TIMEOUT_ERROR			(1 << 5)
+#define  RX_LP_TX_SYNC_ERROR				(1 << 4)
+#define  RXEXCAPE_MODE_ENTRY_ERROR			(1 << 3)
+#define  RXEOT_SYNC_ERROR				(1 << 2)
+#define  RXSOT_SYNC_ERROR				(1 << 1)
+#define  RXSOT_ERROR					(1 << 0)
+
+#define _MIPIA_DSI_FUNC_PRG		(dev_priv->mipi_mmio_base + 0xb00c)
+#define _MIPIC_DSI_FUNC_PRG		(dev_priv->mipi_mmio_base + 0xb80c)
+#define MIPI_DSI_FUNC_PRG(port)		_MMIO_MIPI(port, _MIPIA_DSI_FUNC_PRG, _MIPIC_DSI_FUNC_PRG)
+#define  CMD_MODE_DATA_WIDTH_MASK			(7 << 13)
+#define  CMD_MODE_NOT_SUPPORTED				(0 << 13)
+#define  CMD_MODE_DATA_WIDTH_16_BIT			(1 << 13)
+#define  CMD_MODE_DATA_WIDTH_9_BIT			(2 << 13)
+#define  CMD_MODE_DATA_WIDTH_8_BIT			(3 << 13)
+#define  CMD_MODE_DATA_WIDTH_OPTION1			(4 << 13)
+#define  CMD_MODE_DATA_WIDTH_OPTION2			(5 << 13)
+#define  VID_MODE_FORMAT_MASK				(0xf << 7)
+#define  VID_MODE_NOT_SUPPORTED				(0 << 7)
+#define  VID_MODE_FORMAT_RGB565				(1 << 7)
+#define  VID_MODE_FORMAT_RGB666_PACKED			(2 << 7)
+#define  VID_MODE_FORMAT_RGB666				(3 << 7)
+#define  VID_MODE_FORMAT_RGB888				(4 << 7)
+#define  CMD_MODE_CHANNEL_NUMBER_SHIFT			5
+#define  CMD_MODE_CHANNEL_NUMBER_MASK			(3 << 5)
+#define  VID_MODE_CHANNEL_NUMBER_SHIFT			3
+#define  VID_MODE_CHANNEL_NUMBER_MASK			(3 << 3)
+#define  DATA_LANES_PRG_REG_SHIFT			0
+#define  DATA_LANES_PRG_REG_MASK			(7 << 0)
+
+#define _MIPIA_HS_TX_TIMEOUT		(dev_priv->mipi_mmio_base + 0xb010)
+#define _MIPIC_HS_TX_TIMEOUT		(dev_priv->mipi_mmio_base + 0xb810)
+#define MIPI_HS_TX_TIMEOUT(port)	_MMIO_MIPI(port, _MIPIA_HS_TX_TIMEOUT, _MIPIC_HS_TX_TIMEOUT)
+#define  HIGH_SPEED_TX_TIMEOUT_COUNTER_MASK		0xffffff
+
+#define _MIPIA_LP_RX_TIMEOUT		(dev_priv->mipi_mmio_base + 0xb014)
+#define _MIPIC_LP_RX_TIMEOUT		(dev_priv->mipi_mmio_base + 0xb814)
+#define MIPI_LP_RX_TIMEOUT(port)	_MMIO_MIPI(port, _MIPIA_LP_RX_TIMEOUT, _MIPIC_LP_RX_TIMEOUT)
+#define  LOW_POWER_RX_TIMEOUT_COUNTER_MASK		0xffffff
+
+#define _MIPIA_TURN_AROUND_TIMEOUT	(dev_priv->mipi_mmio_base + 0xb018)
+#define _MIPIC_TURN_AROUND_TIMEOUT	(dev_priv->mipi_mmio_base + 0xb818)
+#define MIPI_TURN_AROUND_TIMEOUT(port)	_MMIO_MIPI(port, _MIPIA_TURN_AROUND_TIMEOUT, _MIPIC_TURN_AROUND_TIMEOUT)
+#define  TURN_AROUND_TIMEOUT_MASK			0x3f
+
+#define _MIPIA_DEVICE_RESET_TIMER	(dev_priv->mipi_mmio_base + 0xb01c)
+#define _MIPIC_DEVICE_RESET_TIMER	(dev_priv->mipi_mmio_base + 0xb81c)
+#define MIPI_DEVICE_RESET_TIMER(port)	_MMIO_MIPI(port, _MIPIA_DEVICE_RESET_TIMER, _MIPIC_DEVICE_RESET_TIMER)
+#define  DEVICE_RESET_TIMER_MASK			0xffff
+
+#define _MIPIA_DPI_RESOLUTION		(dev_priv->mipi_mmio_base + 0xb020)
+#define _MIPIC_DPI_RESOLUTION		(dev_priv->mipi_mmio_base + 0xb820)
+#define MIPI_DPI_RESOLUTION(port)	_MMIO_MIPI(port, _MIPIA_DPI_RESOLUTION, _MIPIC_DPI_RESOLUTION)
+#define  VERTICAL_ADDRESS_SHIFT				16
+#define  VERTICAL_ADDRESS_MASK				(0xffff << 16)
+#define  HORIZONTAL_ADDRESS_SHIFT			0
+#define  HORIZONTAL_ADDRESS_MASK			0xffff
+
+#define _MIPIA_DBI_FIFO_THROTTLE	(dev_priv->mipi_mmio_base + 0xb024)
+#define _MIPIC_DBI_FIFO_THROTTLE	(dev_priv->mipi_mmio_base + 0xb824)
+#define MIPI_DBI_FIFO_THROTTLE(port)	_MMIO_MIPI(port, _MIPIA_DBI_FIFO_THROTTLE, _MIPIC_DBI_FIFO_THROTTLE)
+#define  DBI_FIFO_EMPTY_HALF				(0 << 0)
+#define  DBI_FIFO_EMPTY_QUARTER				(1 << 0)
+#define  DBI_FIFO_EMPTY_7_LOCATIONS			(2 << 0)
+
+/* regs below are bits 15:0 */
+#define _MIPIA_HSYNC_PADDING_COUNT	(dev_priv->mipi_mmio_base + 0xb028)
+#define _MIPIC_HSYNC_PADDING_COUNT	(dev_priv->mipi_mmio_base + 0xb828)
+#define MIPI_HSYNC_PADDING_COUNT(port)	_MMIO_MIPI(port, _MIPIA_HSYNC_PADDING_COUNT, _MIPIC_HSYNC_PADDING_COUNT)
+
+#define _MIPIA_HBP_COUNT		(dev_priv->mipi_mmio_base + 0xb02c)
+#define _MIPIC_HBP_COUNT		(dev_priv->mipi_mmio_base + 0xb82c)
+#define MIPI_HBP_COUNT(port)		_MMIO_MIPI(port, _MIPIA_HBP_COUNT, _MIPIC_HBP_COUNT)
+
+#define _MIPIA_HFP_COUNT		(dev_priv->mipi_mmio_base + 0xb030)
+#define _MIPIC_HFP_COUNT		(dev_priv->mipi_mmio_base + 0xb830)
+#define MIPI_HFP_COUNT(port)		_MMIO_MIPI(port, _MIPIA_HFP_COUNT, _MIPIC_HFP_COUNT)
+
+#define _MIPIA_HACTIVE_AREA_COUNT	(dev_priv->mipi_mmio_base + 0xb034)
+#define _MIPIC_HACTIVE_AREA_COUNT	(dev_priv->mipi_mmio_base + 0xb834)
+#define MIPI_HACTIVE_AREA_COUNT(port)	_MMIO_MIPI(port, _MIPIA_HACTIVE_AREA_COUNT, _MIPIC_HACTIVE_AREA_COUNT)
+
+#define _MIPIA_VSYNC_PADDING_COUNT	(dev_priv->mipi_mmio_base + 0xb038)
+#define _MIPIC_VSYNC_PADDING_COUNT	(dev_priv->mipi_mmio_base + 0xb838)
+#define MIPI_VSYNC_PADDING_COUNT(port)	_MMIO_MIPI(port, _MIPIA_VSYNC_PADDING_COUNT, _MIPIC_VSYNC_PADDING_COUNT)
+
+#define _MIPIA_VBP_COUNT		(dev_priv->mipi_mmio_base + 0xb03c)
+#define _MIPIC_VBP_COUNT		(dev_priv->mipi_mmio_base + 0xb83c)
+#define MIPI_VBP_COUNT(port)		_MMIO_MIPI(port, _MIPIA_VBP_COUNT, _MIPIC_VBP_COUNT)
+
+#define _MIPIA_VFP_COUNT		(dev_priv->mipi_mmio_base + 0xb040)
+#define _MIPIC_VFP_COUNT		(dev_priv->mipi_mmio_base + 0xb840)
+#define MIPI_VFP_COUNT(port)		_MMIO_MIPI(port, _MIPIA_VFP_COUNT, _MIPIC_VFP_COUNT)
+
+#define _MIPIA_HIGH_LOW_SWITCH_COUNT	(dev_priv->mipi_mmio_base + 0xb044)
+#define _MIPIC_HIGH_LOW_SWITCH_COUNT	(dev_priv->mipi_mmio_base + 0xb844)
+#define MIPI_HIGH_LOW_SWITCH_COUNT(port)	_MMIO_MIPI(port,	_MIPIA_HIGH_LOW_SWITCH_COUNT, _MIPIC_HIGH_LOW_SWITCH_COUNT)
+
+#define _MIPIA_DPI_CONTROL		(dev_priv->mipi_mmio_base + 0xb048)
+#define _MIPIC_DPI_CONTROL		(dev_priv->mipi_mmio_base + 0xb848)
+#define MIPI_DPI_CONTROL(port)		_MMIO_MIPI(port, _MIPIA_DPI_CONTROL, _MIPIC_DPI_CONTROL)
+#define  DPI_LP_MODE					(1 << 6)
+#define  BACKLIGHT_OFF					(1 << 5)
+#define  BACKLIGHT_ON					(1 << 4)
+#define  COLOR_MODE_OFF					(1 << 3)
+#define  COLOR_MODE_ON					(1 << 2)
+#define  TURN_ON					(1 << 1)
+#define  SHUTDOWN					(1 << 0)
+
+#define _MIPIA_DPI_DATA			(dev_priv->mipi_mmio_base + 0xb04c)
+#define _MIPIC_DPI_DATA			(dev_priv->mipi_mmio_base + 0xb84c)
+#define MIPI_DPI_DATA(port)		_MMIO_MIPI(port, _MIPIA_DPI_DATA, _MIPIC_DPI_DATA)
+#define  COMMAND_BYTE_SHIFT				0
+#define  COMMAND_BYTE_MASK				(0x3f << 0)
+
+#define _MIPIA_INIT_COUNT		(dev_priv->mipi_mmio_base + 0xb050)
+#define _MIPIC_INIT_COUNT		(dev_priv->mipi_mmio_base + 0xb850)
+#define MIPI_INIT_COUNT(port)		_MMIO_MIPI(port, _MIPIA_INIT_COUNT, _MIPIC_INIT_COUNT)
+#define  MASTER_INIT_TIMER_SHIFT			0
+#define  MASTER_INIT_TIMER_MASK				(0xffff << 0)
+
+#define _MIPIA_MAX_RETURN_PKT_SIZE	(dev_priv->mipi_mmio_base + 0xb054)
+#define _MIPIC_MAX_RETURN_PKT_SIZE	(dev_priv->mipi_mmio_base + 0xb854)
+#define MIPI_MAX_RETURN_PKT_SIZE(port)	_MMIO_MIPI(port, \
+			_MIPIA_MAX_RETURN_PKT_SIZE, _MIPIC_MAX_RETURN_PKT_SIZE)
+#define  MAX_RETURN_PKT_SIZE_SHIFT			0
+#define  MAX_RETURN_PKT_SIZE_MASK			(0x3ff << 0)
+
+#define _MIPIA_VIDEO_MODE_FORMAT	(dev_priv->mipi_mmio_base + 0xb058)
+#define _MIPIC_VIDEO_MODE_FORMAT	(dev_priv->mipi_mmio_base + 0xb858)
+#define MIPI_VIDEO_MODE_FORMAT(port)	_MMIO_MIPI(port, _MIPIA_VIDEO_MODE_FORMAT, _MIPIC_VIDEO_MODE_FORMAT)
+#define  RANDOM_DPI_DISPLAY_RESOLUTION			(1 << 4)
+#define  DISABLE_VIDEO_BTA				(1 << 3)
+#define  IP_TG_CONFIG					(1 << 2)
+#define  VIDEO_MODE_NON_BURST_WITH_SYNC_PULSE		(1 << 0)
+#define  VIDEO_MODE_NON_BURST_WITH_SYNC_EVENTS		(2 << 0)
+#define  VIDEO_MODE_BURST				(3 << 0)
+
+#define _MIPIA_EOT_DISABLE		(dev_priv->mipi_mmio_base + 0xb05c)
+#define _MIPIC_EOT_DISABLE		(dev_priv->mipi_mmio_base + 0xb85c)
+#define MIPI_EOT_DISABLE(port)		_MMIO_MIPI(port, _MIPIA_EOT_DISABLE, _MIPIC_EOT_DISABLE)
+#define  BXT_DEFEATURE_DPI_FIFO_CTR			(1 << 9)
+#define  BXT_DPHY_DEFEATURE_EN				(1 << 8)
+#define  LP_RX_TIMEOUT_ERROR_RECOVERY_DISABLE		(1 << 7)
+#define  HS_RX_TIMEOUT_ERROR_RECOVERY_DISABLE		(1 << 6)
+#define  LOW_CONTENTION_RECOVERY_DISABLE		(1 << 5)
+#define  HIGH_CONTENTION_RECOVERY_DISABLE		(1 << 4)
+#define  TXDSI_TYPE_NOT_RECOGNISED_ERROR_RECOVERY_DISABLE (1 << 3)
+#define  TXECC_MULTIBIT_ERROR_RECOVERY_DISABLE		(1 << 2)
+#define  CLOCKSTOP					(1 << 1)
+#define  EOT_DISABLE					(1 << 0)
+
+#define _MIPIA_LP_BYTECLK		(dev_priv->mipi_mmio_base + 0xb060)
+#define _MIPIC_LP_BYTECLK		(dev_priv->mipi_mmio_base + 0xb860)
+#define MIPI_LP_BYTECLK(port)		_MMIO_MIPI(port, _MIPIA_LP_BYTECLK, _MIPIC_LP_BYTECLK)
+#define  LP_BYTECLK_SHIFT				0
+#define  LP_BYTECLK_MASK				(0xffff << 0)
+
+#define _MIPIA_TLPX_TIME_COUNT		(dev_priv->mipi_mmio_base + 0xb0a4)
+#define _MIPIC_TLPX_TIME_COUNT		(dev_priv->mipi_mmio_base + 0xb8a4)
+#define MIPI_TLPX_TIME_COUNT(port)	 _MMIO_MIPI(port, _MIPIA_TLPX_TIME_COUNT, _MIPIC_TLPX_TIME_COUNT)
+
+#define _MIPIA_CLK_LANE_TIMING		(dev_priv->mipi_mmio_base + 0xb098)
+#define _MIPIC_CLK_LANE_TIMING		(dev_priv->mipi_mmio_base + 0xb898)
+#define MIPI_CLK_LANE_TIMING(port)	 _MMIO_MIPI(port, _MIPIA_CLK_LANE_TIMING, _MIPIC_CLK_LANE_TIMING)
+
+/* bits 31:0 */
+#define _MIPIA_LP_GEN_DATA		(dev_priv->mipi_mmio_base + 0xb064)
+#define _MIPIC_LP_GEN_DATA		(dev_priv->mipi_mmio_base + 0xb864)
+#define MIPI_LP_GEN_DATA(port)		_MMIO_MIPI(port, _MIPIA_LP_GEN_DATA, _MIPIC_LP_GEN_DATA)
+
+/* bits 31:0 */
+#define _MIPIA_HS_GEN_DATA		(dev_priv->mipi_mmio_base + 0xb068)
+#define _MIPIC_HS_GEN_DATA		(dev_priv->mipi_mmio_base + 0xb868)
+#define MIPI_HS_GEN_DATA(port)		_MMIO_MIPI(port, _MIPIA_HS_GEN_DATA, _MIPIC_HS_GEN_DATA)
+
+#define _MIPIA_LP_GEN_CTRL		(dev_priv->mipi_mmio_base + 0xb06c)
+#define _MIPIC_LP_GEN_CTRL		(dev_priv->mipi_mmio_base + 0xb86c)
+#define MIPI_LP_GEN_CTRL(port)		_MMIO_MIPI(port, _MIPIA_LP_GEN_CTRL, _MIPIC_LP_GEN_CTRL)
+#define _MIPIA_HS_GEN_CTRL		(dev_priv->mipi_mmio_base + 0xb070)
+#define _MIPIC_HS_GEN_CTRL		(dev_priv->mipi_mmio_base + 0xb870)
+#define MIPI_HS_GEN_CTRL(port)		_MMIO_MIPI(port, _MIPIA_HS_GEN_CTRL, _MIPIC_HS_GEN_CTRL)
+#define  LONG_PACKET_WORD_COUNT_SHIFT			8
+#define  LONG_PACKET_WORD_COUNT_MASK			(0xffff << 8)
+#define  SHORT_PACKET_PARAM_SHIFT			8
+#define  SHORT_PACKET_PARAM_MASK			(0xffff << 8)
+#define  VIRTUAL_CHANNEL_SHIFT				6
+#define  VIRTUAL_CHANNEL_MASK				(3 << 6)
+#define  DATA_TYPE_SHIFT				0
+#define  DATA_TYPE_MASK					(0x3f << 0)
+/* data type values, see include/video/mipi_display.h */
+
+#define _MIPIA_GEN_FIFO_STAT		(dev_priv->mipi_mmio_base + 0xb074)
+#define _MIPIC_GEN_FIFO_STAT		(dev_priv->mipi_mmio_base + 0xb874)
+#define MIPI_GEN_FIFO_STAT(port)	_MMIO_MIPI(port, _MIPIA_GEN_FIFO_STAT, _MIPIC_GEN_FIFO_STAT)
+#define  DPI_FIFO_EMPTY					(1 << 28)
+#define  DBI_FIFO_EMPTY					(1 << 27)
+#define  LP_CTRL_FIFO_EMPTY				(1 << 26)
+#define  LP_CTRL_FIFO_HALF_EMPTY			(1 << 25)
+#define  LP_CTRL_FIFO_FULL				(1 << 24)
+#define  HS_CTRL_FIFO_EMPTY				(1 << 18)
+#define  HS_CTRL_FIFO_HALF_EMPTY			(1 << 17)
+#define  HS_CTRL_FIFO_FULL				(1 << 16)
+#define  LP_DATA_FIFO_EMPTY				(1 << 10)
+#define  LP_DATA_FIFO_HALF_EMPTY			(1 << 9)
+#define  LP_DATA_FIFO_FULL				(1 << 8)
+#define  HS_DATA_FIFO_EMPTY				(1 << 2)
+#define  HS_DATA_FIFO_HALF_EMPTY			(1 << 1)
+#define  HS_DATA_FIFO_FULL				(1 << 0)
+
+#define _MIPIA_HS_LS_DBI_ENABLE		(dev_priv->mipi_mmio_base + 0xb078)
+#define _MIPIC_HS_LS_DBI_ENABLE		(dev_priv->mipi_mmio_base + 0xb878)
+#define MIPI_HS_LP_DBI_ENABLE(port)	_MMIO_MIPI(port, _MIPIA_HS_LS_DBI_ENABLE, _MIPIC_HS_LS_DBI_ENABLE)
+#define  DBI_HS_LP_MODE_MASK				(1 << 0)
+#define  DBI_LP_MODE					(1 << 0)
+#define  DBI_HS_MODE					(0 << 0)
+
+#define _MIPIA_DPHY_PARAM		(dev_priv->mipi_mmio_base + 0xb080)
+#define _MIPIC_DPHY_PARAM		(dev_priv->mipi_mmio_base + 0xb880)
+#define MIPI_DPHY_PARAM(port)		_MMIO_MIPI(port, _MIPIA_DPHY_PARAM, _MIPIC_DPHY_PARAM)
+#define  EXIT_ZERO_COUNT_SHIFT				24
+#define  EXIT_ZERO_COUNT_MASK				(0x3f << 24)
+#define  TRAIL_COUNT_SHIFT				16
+#define  TRAIL_COUNT_MASK				(0x1f << 16)
+#define  CLK_ZERO_COUNT_SHIFT				8
+#define  CLK_ZERO_COUNT_MASK				(0xff << 8)
+#define  PREPARE_COUNT_SHIFT				0
+#define  PREPARE_COUNT_MASK				(0x3f << 0)
+
+#define _MIPIA_DBI_BW_CTRL		(dev_priv->mipi_mmio_base + 0xb084)
+#define _MIPIC_DBI_BW_CTRL		(dev_priv->mipi_mmio_base + 0xb884)
+#define MIPI_DBI_BW_CTRL(port)		_MMIO_MIPI(port, _MIPIA_DBI_BW_CTRL, _MIPIC_DBI_BW_CTRL)
+
+#define _MIPIA_CLK_LANE_SWITCH_TIME_CNT		(dev_priv->mipi_mmio_base + 0xb088)
+#define _MIPIC_CLK_LANE_SWITCH_TIME_CNT		(dev_priv->mipi_mmio_base + 0xb888)
+#define MIPI_CLK_LANE_SWITCH_TIME_CNT(port)	_MMIO_MIPI(port, _MIPIA_CLK_LANE_SWITCH_TIME_CNT, _MIPIC_CLK_LANE_SWITCH_TIME_CNT)
+#define  LP_HS_SSW_CNT_SHIFT				16
+#define  LP_HS_SSW_CNT_MASK				(0xffff << 16)
+#define  HS_LP_PWR_SW_CNT_SHIFT				0
+#define  HS_LP_PWR_SW_CNT_MASK				(0xffff << 0)
+
+#define _MIPIA_STOP_STATE_STALL		(dev_priv->mipi_mmio_base + 0xb08c)
+#define _MIPIC_STOP_STATE_STALL		(dev_priv->mipi_mmio_base + 0xb88c)
+#define MIPI_STOP_STATE_STALL(port)	_MMIO_MIPI(port, _MIPIA_STOP_STATE_STALL, _MIPIC_STOP_STATE_STALL)
+#define  STOP_STATE_STALL_COUNTER_SHIFT			0
+#define  STOP_STATE_STALL_COUNTER_MASK			(0xff << 0)
+
+#define _MIPIA_INTR_STAT_REG_1		(dev_priv->mipi_mmio_base + 0xb090)
+#define _MIPIC_INTR_STAT_REG_1		(dev_priv->mipi_mmio_base + 0xb890)
+#define MIPI_INTR_STAT_REG_1(port)	_MMIO_MIPI(port, _MIPIA_INTR_STAT_REG_1, _MIPIC_INTR_STAT_REG_1)
+#define _MIPIA_INTR_EN_REG_1		(dev_priv->mipi_mmio_base + 0xb094)
+#define _MIPIC_INTR_EN_REG_1		(dev_priv->mipi_mmio_base + 0xb894)
+#define MIPI_INTR_EN_REG_1(port)	_MMIO_MIPI(port, _MIPIA_INTR_EN_REG_1, _MIPIC_INTR_EN_REG_1)
+#define  RX_CONTENTION_DETECTED				(1 << 0)
+
+/* XXX: only pipe A ?!? */
+#define MIPIA_DBI_TYPEC_CTRL		(dev_priv->mipi_mmio_base + 0xb100)
+#define  DBI_TYPEC_ENABLE				(1 << 31)
+#define  DBI_TYPEC_WIP					(1 << 30)
+#define  DBI_TYPEC_OPTION_SHIFT				28
+#define  DBI_TYPEC_OPTION_MASK				(3 << 28)
+#define  DBI_TYPEC_FREQ_SHIFT				24
+#define  DBI_TYPEC_FREQ_MASK				(0xf << 24)
+#define  DBI_TYPEC_OVERRIDE				(1 << 8)
+#define  DBI_TYPEC_OVERRIDE_COUNTER_SHIFT		0
+#define  DBI_TYPEC_OVERRIDE_COUNTER_MASK		(0xff << 0)
+
+/* MIPI adapter registers */
+
+#define _MIPIA_CTRL			(dev_priv->mipi_mmio_base + 0xb104)
+#define _MIPIC_CTRL			(dev_priv->mipi_mmio_base + 0xb904)
+#define MIPI_CTRL(port)			_MMIO_MIPI(port, _MIPIA_CTRL, _MIPIC_CTRL)
+#define  ESCAPE_CLOCK_DIVIDER_SHIFT			5 /* A only */
+#define  ESCAPE_CLOCK_DIVIDER_MASK			(3 << 5)
+#define  ESCAPE_CLOCK_DIVIDER_1				(0 << 5)
+#define  ESCAPE_CLOCK_DIVIDER_2				(1 << 5)
+#define  ESCAPE_CLOCK_DIVIDER_4				(2 << 5)
+#define  READ_REQUEST_PRIORITY_SHIFT			3
+#define  READ_REQUEST_PRIORITY_MASK			(3 << 3)
+#define  READ_REQUEST_PRIORITY_LOW			(0 << 3)
+#define  READ_REQUEST_PRIORITY_HIGH			(3 << 3)
+#define  RGB_FLIP_TO_BGR				(1 << 2)
+
+#define  BXT_PIPE_SELECT_SHIFT				7
+#define  BXT_PIPE_SELECT_MASK				(7 << 7)
+#define  BXT_PIPE_SELECT(pipe)				((pipe) << 7)
+#define  GLK_PHY_STATUS_PORT_READY			(1 << 31) /* RO */
+#define  GLK_ULPS_NOT_ACTIVE				(1 << 30) /* RO */
+#define  GLK_MIPIIO_RESET_RELEASED			(1 << 28)
+#define  GLK_CLOCK_LANE_STOP_STATE			(1 << 27) /* RO */
+#define  GLK_DATA_LANE_STOP_STATE			(1 << 26) /* RO */
+#define  GLK_LP_WAKE					(1 << 22)
+#define  GLK_LP11_LOW_PWR_MODE				(1 << 21)
+#define  GLK_LP00_LOW_PWR_MODE				(1 << 20)
+#define  GLK_FIREWALL_ENABLE				(1 << 16)
+#define  BXT_PIXEL_OVERLAP_CNT_MASK			(0xf << 10)
+#define  BXT_PIXEL_OVERLAP_CNT_SHIFT			10
+#define  BXT_DSC_ENABLE					(1 << 3)
+#define  BXT_RGB_FLIP					(1 << 2)
+#define  GLK_MIPIIO_PORT_POWERED			(1 << 1) /* RO */
+#define  GLK_MIPIIO_ENABLE				(1 << 0)
+
+#define _MIPIA_DATA_ADDRESS		(dev_priv->mipi_mmio_base + 0xb108)
+#define _MIPIC_DATA_ADDRESS		(dev_priv->mipi_mmio_base + 0xb908)
+#define MIPI_DATA_ADDRESS(port)		_MMIO_MIPI(port, _MIPIA_DATA_ADDRESS, _MIPIC_DATA_ADDRESS)
+#define  DATA_MEM_ADDRESS_SHIFT				5
+#define  DATA_MEM_ADDRESS_MASK				(0x7ffffff << 5)
+#define  DATA_VALID					(1 << 0)
+
+#define _MIPIA_DATA_LENGTH		(dev_priv->mipi_mmio_base + 0xb10c)
+#define _MIPIC_DATA_LENGTH		(dev_priv->mipi_mmio_base + 0xb90c)
+#define MIPI_DATA_LENGTH(port)		_MMIO_MIPI(port, _MIPIA_DATA_LENGTH, _MIPIC_DATA_LENGTH)
+#define  DATA_LENGTH_SHIFT				0
+#define  DATA_LENGTH_MASK				(0xfffff << 0)
+
+#define _MIPIA_COMMAND_ADDRESS		(dev_priv->mipi_mmio_base + 0xb110)
+#define _MIPIC_COMMAND_ADDRESS		(dev_priv->mipi_mmio_base + 0xb910)
+#define MIPI_COMMAND_ADDRESS(port)	_MMIO_MIPI(port, _MIPIA_COMMAND_ADDRESS, _MIPIC_COMMAND_ADDRESS)
+#define  COMMAND_MEM_ADDRESS_SHIFT			5
+#define  COMMAND_MEM_ADDRESS_MASK			(0x7ffffff << 5)
+#define  AUTO_PWG_ENABLE				(1 << 2)
+#define  MEMORY_WRITE_DATA_FROM_PIPE_RENDERING		(1 << 1)
+#define  COMMAND_VALID					(1 << 0)
+
+#define _MIPIA_COMMAND_LENGTH		(dev_priv->mipi_mmio_base + 0xb114)
+#define _MIPIC_COMMAND_LENGTH		(dev_priv->mipi_mmio_base + 0xb914)
+#define MIPI_COMMAND_LENGTH(port)	_MMIO_MIPI(port, _MIPIA_COMMAND_LENGTH, _MIPIC_COMMAND_LENGTH)
+#define  COMMAND_LENGTH_SHIFT(n)			(8 * (n)) /* n: 0...3 */
+#define  COMMAND_LENGTH_MASK(n)				(0xff << (8 * (n)))
+
+#define _MIPIA_READ_DATA_RETURN0	(dev_priv->mipi_mmio_base + 0xb118)
+#define _MIPIC_READ_DATA_RETURN0	(dev_priv->mipi_mmio_base + 0xb918)
+#define MIPI_READ_DATA_RETURN(port, n) _MMIO(_MIPI(port, _MIPIA_READ_DATA_RETURN0, _MIPIC_READ_DATA_RETURN0) + 4 * (n)) /* n: 0...7 */
+
+#define _MIPIA_READ_DATA_VALID		(dev_priv->mipi_mmio_base + 0xb138)
+#define _MIPIC_READ_DATA_VALID		(dev_priv->mipi_mmio_base + 0xb938)
+#define MIPI_READ_DATA_VALID(port)	_MMIO_MIPI(port, _MIPIA_READ_DATA_VALID, _MIPIC_READ_DATA_VALID)
+#define  READ_DATA_VALID(n)				(1 << (n))
+
+#endif /* __VLV_DSI_REGS_H__ */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
index 8a248003dfae1f7f95952a1a35f1ee54306ad918..ce91b23385cf9f1707185a29c7bff8185db49cfe 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
@@ -4,6 +4,8 @@
  * Copyright © 2016 Intel Corporation
  */
 
+#include <drm/drm_cache.h>
+
 #include "display/intel_frontbuffer.h"
 
 #include "i915_drv.h"
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 00327b750fbb9aea3be98690988592fc11d3f3ef..bc6d59df064dc6914d871f25a70743f892807316 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -67,6 +67,7 @@
 #include <linux/log2.h>
 #include <linux/nospec.h>
 
+#include <drm/drm_cache.h>
 #include <drm/drm_syncobj.h>
 
 #include "gt/gen6_ppgtt.h"
@@ -79,6 +80,7 @@
 
 #include "pxp/intel_pxp.h"
 
+#include "i915_file_private.h"
 #include "i915_gem_context.h"
 #include "i915_trace.h"
 #include "i915_user_extensions.h"
@@ -343,6 +345,20 @@ static int proto_context_register(struct drm_i915_file_private *fpriv,
 	return ret;
 }
 
+static struct i915_address_space *
+i915_gem_vm_lookup(struct drm_i915_file_private *file_priv, u32 id)
+{
+	struct i915_address_space *vm;
+
+	xa_lock(&file_priv->vm_xa);
+	vm = xa_load(&file_priv->vm_xa, id);
+	if (vm)
+		kref_get(&vm->ref);
+	xa_unlock(&file_priv->vm_xa);
+
+	return vm;
+}
+
 static int set_proto_ctx_vm(struct drm_i915_file_private *fpriv,
 			    struct i915_gem_proto_context *pc,
 			    const struct drm_i915_gem_context_param *args)
@@ -571,10 +587,6 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base,
 	struct intel_engine_cs **siblings = NULL;
 	intel_engine_mask_t prev_mask;
 
-	/* FIXME: This is NIY for execlists */
-	if (!(intel_uc_uses_guc_submission(&to_gt(i915)->uc)))
-		return -ENODEV;
-
 	if (get_user(slot, &ext->engine_index))
 		return -EFAULT;
 
@@ -584,6 +596,13 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base,
 	if (get_user(num_siblings, &ext->num_siblings))
 		return -EFAULT;
 
+	if (!intel_uc_uses_guc_submission(&to_gt(i915)->uc) &&
+	    num_siblings != 1) {
+		drm_dbg(&i915->drm, "Only 1 sibling (%d) supported in non-GuC mode\n",
+			num_siblings);
+		return -EINVAL;
+	}
+
 	if (slot >= set->num_engines) {
 		drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n",
 			slot, set->num_engines);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
index babfecb17ad1a763ecabfcc27878b42c9a6cf309..e5b0f66ea1feb4488e8490a6ca03a6e3be3d299c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
@@ -174,7 +174,7 @@ i915_gem_context_get_eb_vm(struct i915_gem_context *ctx)
 
 	vm = ctx->vm;
 	if (!vm)
-		vm = &ctx->i915->ggtt.vm;
+		vm = &to_gt(ctx->i915)->ggtt->vm;
 	vm = i915_vm_get(vm);
 
 	return vm;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c
index 9402d4bf4ffc5ffe8bc287497df01eedde9cda4a..c6eb023d3d8670a6a8c9cef3e400f7ac0f50bd6f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
@@ -3,12 +3,15 @@
  * Copyright © 2020 Intel Corporation
  */
 
+#include <drm/drm_fourcc.h>
+
 #include "gem/i915_gem_ioctls.h"
 #include "gem/i915_gem_lmem.h"
 #include "gem/i915_gem_region.h"
 #include "pxp/intel_pxp.h"
 
 #include "i915_drv.h"
+#include "i915_gem_create.h"
 #include "i915_trace.h"
 #include "i915_user_extensions.h"
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.h b/drivers/gpu/drm/i915/gem/i915_gem_create.h
new file mode 100644
index 0000000000000000000000000000000000000000..9536aa906001436402aaf0cd095ea579a7f3af07
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_create.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef __I915_GEM_CREATE_H__
+#define __I915_GEM_CREATE_H__
+
+struct drm_file;
+struct drm_device;
+struct drm_mode_create_dumb;
+
+int i915_gem_dumb_create(struct drm_file *file_priv,
+			 struct drm_device *dev,
+			 struct drm_mode_create_dumb *args);
+
+#endif /* __I915_GEM_CREATE_H__ */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index 14fdb0796c5298ee263438eff3c61b9eab3b7d2c..13917231ae81a09e3b232d7d4b9fd5cb01f83f21 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -11,6 +11,7 @@
 
 #include <asm/smp.h>
 
+#include "gem/i915_gem_dmabuf.h"
 #include "i915_drv.h"
 #include "i915_gem_object.h"
 #include "i915_scatterlist.h"
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.h b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.h
new file mode 100644
index 0000000000000000000000000000000000000000..6e0405d47ce190d43aac6b9e7f6e6d288699b09b
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef __I915_GEM_DMABUF_H__
+#define __I915_GEM_DMABUF_H__
+
+struct drm_gem_object;
+struct drm_device;
+struct dma_buf;
+
+struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
+					     struct dma_buf *dma_buf);
+
+struct dma_buf *i915_gem_prime_export(struct drm_gem_object *gem_obj, int flags);
+
+#endif /* __I915_GEM_DMABUF_H__ */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index 26532c07d467a5c91f71172e3634feaa7226bb15..3e5d6057b3ef91cf4a4a8d77cf13f06f1cf60361 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -9,12 +9,13 @@
 
 #include "i915_drv.h"
 #include "i915_gem_clflush.h"
+#include "i915_gem_domain.h"
 #include "i915_gem_gtt.h"
 #include "i915_gem_ioctls.h"
-#include "i915_gem_object.h"
-#include "i915_vma.h"
 #include "i915_gem_lmem.h"
 #include "i915_gem_mman.h"
+#include "i915_gem_object.h"
+#include "i915_vma.h"
 
 static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
 {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.h b/drivers/gpu/drm/i915/gem/i915_gem_domain.h
new file mode 100644
index 0000000000000000000000000000000000000000..9622df962bfcc67498adac23985020e2ff74fdbc
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef __I915_GEM_DOMAIN_H__
+#define __I915_GEM_DOMAIN_H__
+
+struct drm_i915_gem_object;
+enum i915_cache_level;
+
+int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
+				    enum i915_cache_level cache_level);
+
+#endif /* __I915_GEM_DOMAIN_H__ */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 54cae821513bfe8865744bbd54cc473b0f9a966f..d42f437149c953577390f6efff9a60bdb2c62926 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -25,13 +25,13 @@
 
 #include "i915_cmd_parser.h"
 #include "i915_drv.h"
+#include "i915_file_private.h"
 #include "i915_gem_clflush.h"
 #include "i915_gem_context.h"
 #include "i915_gem_evict.h"
 #include "i915_gem_ioctls.h"
 #include "i915_trace.h"
 #include "i915_user_extensions.h"
-#include "i915_vma_snapshot.h"
 
 struct eb_vma {
 	struct i915_vma *vma;
@@ -443,7 +443,7 @@ eb_pin_vma(struct i915_execbuffer *eb,
 	else
 		pin_flags = entry->offset & PIN_OFFSET_MASK;
 
-	pin_flags |= PIN_USER | PIN_NOEVICT | PIN_OFFSET_FIXED;
+	pin_flags |= PIN_USER | PIN_NOEVICT | PIN_OFFSET_FIXED | PIN_VALIDATE;
 	if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_GTT))
 		pin_flags |= PIN_GLOBAL;
 
@@ -461,17 +461,15 @@ eb_pin_vma(struct i915_execbuffer *eb,
 					     entry->pad_to_size,
 					     entry->alignment,
 					     eb_pin_flags(entry, ev->flags) |
-					     PIN_USER | PIN_NOEVICT);
+					     PIN_USER | PIN_NOEVICT | PIN_VALIDATE);
 		if (unlikely(err))
 			return err;
 	}
 
 	if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)) {
 		err = i915_vma_pin_fence(vma);
-		if (unlikely(err)) {
-			i915_vma_unpin(vma);
+		if (unlikely(err))
 			return err;
-		}
 
 		if (vma->fence)
 			ev->flags |= __EXEC_OBJECT_HAS_FENCE;
@@ -487,13 +485,9 @@ eb_pin_vma(struct i915_execbuffer *eb,
 static inline void
 eb_unreserve_vma(struct eb_vma *ev)
 {
-	if (!(ev->flags & __EXEC_OBJECT_HAS_PIN))
-		return;
-
 	if (unlikely(ev->flags & __EXEC_OBJECT_HAS_FENCE))
 		__i915_vma_unpin_fence(ev->vma);
 
-	__i915_vma_unpin(ev->vma);
 	ev->flags &= ~__EXEC_OBJECT_RESERVED;
 }
 
@@ -675,10 +669,8 @@ static int eb_reserve_vma(struct i915_execbuffer *eb,
 
 	if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)) {
 		err = i915_vma_pin_fence(vma);
-		if (unlikely(err)) {
-			i915_vma_unpin(vma);
+		if (unlikely(err))
 			return err;
-		}
 
 		if (vma->fence)
 			ev->flags |= __EXEC_OBJECT_HAS_FENCE;
@@ -690,85 +682,95 @@ static int eb_reserve_vma(struct i915_execbuffer *eb,
 	return 0;
 }
 
-static int eb_reserve(struct i915_execbuffer *eb)
+static bool eb_unbind(struct i915_execbuffer *eb, bool force)
 {
 	const unsigned int count = eb->buffer_count;
-	unsigned int pin_flags = PIN_USER | PIN_NONBLOCK;
+	unsigned int i;
 	struct list_head last;
+	bool unpinned = false;
+
+	/* Resort *all* the objects into priority order */
+	INIT_LIST_HEAD(&eb->unbound);
+	INIT_LIST_HEAD(&last);
+
+	for (i = 0; i < count; i++) {
+		struct eb_vma *ev = &eb->vma[i];
+		unsigned int flags = ev->flags;
+
+		if (!force && flags & EXEC_OBJECT_PINNED &&
+		    flags & __EXEC_OBJECT_HAS_PIN)
+			continue;
+
+		unpinned = true;
+		eb_unreserve_vma(ev);
+
+		if (flags & EXEC_OBJECT_PINNED)
+			/* Pinned must have their slot */
+			list_add(&ev->bind_link, &eb->unbound);
+		else if (flags & __EXEC_OBJECT_NEEDS_MAP)
+			/* Map require the lowest 256MiB (aperture) */
+			list_add_tail(&ev->bind_link, &eb->unbound);
+		else if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS))
+			/* Prioritise 4GiB region for restricted bo */
+			list_add(&ev->bind_link, &last);
+		else
+			list_add_tail(&ev->bind_link, &last);
+	}
+
+	list_splice_tail(&last, &eb->unbound);
+	return unpinned;
+}
+
+static int eb_reserve(struct i915_execbuffer *eb)
+{
 	struct eb_vma *ev;
-	unsigned int i, pass;
+	unsigned int pass;
 	int err = 0;
+	bool unpinned;
 
 	/*
 	 * Attempt to pin all of the buffers into the GTT.
-	 * This is done in 3 phases:
+	 * This is done in 2 phases:
 	 *
-	 * 1a. Unbind all objects that do not match the GTT constraints for
-	 *     the execbuffer (fenceable, mappable, alignment etc).
-	 * 1b. Increment pin count for already bound objects.
-	 * 2.  Bind new objects.
-	 * 3.  Decrement pin count.
+	 * 1. Unbind all objects that do not match the GTT constraints for
+	 *    the execbuffer (fenceable, mappable, alignment etc).
+	 * 2. Bind new objects.
 	 *
 	 * This avoid unnecessary unbinding of later objects in order to make
 	 * room for the earlier objects *unless* we need to defragment.
+	 *
+	 * Defragmenting is skipped if all objects are pinned at a fixed location.
 	 */
-	pass = 0;
-	do {
-		list_for_each_entry(ev, &eb->unbound, bind_link) {
-			err = eb_reserve_vma(eb, ev, pin_flags);
-			if (err)
-				break;
-		}
-		if (err != -ENOSPC)
-			return err;
+	for (pass = 0; pass <= 2; pass++) {
+		int pin_flags = PIN_USER | PIN_VALIDATE;
 
-		/* Resort *all* the objects into priority order */
-		INIT_LIST_HEAD(&eb->unbound);
-		INIT_LIST_HEAD(&last);
-		for (i = 0; i < count; i++) {
-			unsigned int flags;
+		if (pass == 0)
+			pin_flags |= PIN_NONBLOCK;
 
-			ev = &eb->vma[i];
-			flags = ev->flags;
-			if (flags & EXEC_OBJECT_PINNED &&
-			    flags & __EXEC_OBJECT_HAS_PIN)
-				continue;
+		if (pass >= 1)
+			unpinned = eb_unbind(eb, pass == 2);
 
-			eb_unreserve_vma(ev);
-
-			if (flags & EXEC_OBJECT_PINNED)
-				/* Pinned must have their slot */
-				list_add(&ev->bind_link, &eb->unbound);
-			else if (flags & __EXEC_OBJECT_NEEDS_MAP)
-				/* Map require the lowest 256MiB (aperture) */
-				list_add_tail(&ev->bind_link, &eb->unbound);
-			else if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS))
-				/* Prioritise 4GiB region for restricted bo */
-				list_add(&ev->bind_link, &last);
-			else
-				list_add_tail(&ev->bind_link, &last);
-		}
-		list_splice_tail(&last, &eb->unbound);
-
-		switch (pass++) {
-		case 0:
-			break;
-
-		case 1:
-			/* Too fragmented, unbind everything and retry */
-			mutex_lock(&eb->context->vm->mutex);
-			err = i915_gem_evict_vm(eb->context->vm);
-			mutex_unlock(&eb->context->vm->mutex);
+		if (pass == 2) {
+			err = mutex_lock_interruptible(&eb->context->vm->mutex);
+			if (!err) {
+				err = i915_gem_evict_vm(eb->context->vm, &eb->ww);
+				mutex_unlock(&eb->context->vm->mutex);
+			}
 			if (err)
 				return err;
-			break;
+		}
 
-		default:
-			return -ENOSPC;
+		list_for_each_entry(ev, &eb->unbound, bind_link) {
+			err = eb_reserve_vma(eb, ev, pin_flags);
+			if (err)
+				break;
 		}
 
-		pin_flags = PIN_USER;
-	} while (1);
+		if (err != -ENOSPC)
+			break;
+	}
+
+	return err;
 }
 
 static int eb_select_context(struct i915_execbuffer *eb)
@@ -1097,7 +1099,7 @@ static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache)
 {
 	struct drm_i915_private *i915 =
 		container_of(cache, struct i915_execbuffer, reloc_cache)->i915;
-	return &i915->ggtt;
+	return to_gt(i915)->ggtt;
 }
 
 static void reloc_cache_unmap(struct reloc_cache *cache)
@@ -1216,10 +1218,11 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj,
 	return vaddr;
 }
 
-static void *reloc_iomap(struct drm_i915_gem_object *obj,
+static void *reloc_iomap(struct i915_vma *batch,
 			 struct i915_execbuffer *eb,
 			 unsigned long page)
 {
+	struct drm_i915_gem_object *obj = batch->obj;
 	struct reloc_cache *cache = &eb->reloc_cache;
 	struct i915_ggtt *ggtt = cache_to_ggtt(cache);
 	unsigned long offset;
@@ -1229,7 +1232,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
 		intel_gt_flush_ggtt_writes(ggtt->vm.gt);
 		io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr));
 	} else {
-		struct i915_vma *vma;
+		struct i915_vma *vma = ERR_PTR(-ENODEV);
 		int err;
 
 		if (i915_gem_object_is_tiled(obj))
@@ -1242,10 +1245,23 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
 		if (err)
 			return ERR_PTR(err);
 
-		vma = i915_gem_object_ggtt_pin_ww(obj, &eb->ww, NULL, 0, 0,
-						  PIN_MAPPABLE |
-						  PIN_NONBLOCK /* NOWARN */ |
-						  PIN_NOEVICT);
+		/*
+		 * i915_gem_object_ggtt_pin_ww may attempt to remove the batch
+		 * VMA from the object list because we no longer pin.
+		 *
+		 * Only attempt to pin the batch buffer to ggtt if the current batch
+		 * is not inside ggtt, or the batch buffer is not misplaced.
+		 */
+		if (!i915_is_ggtt(batch->vm)) {
+			vma = i915_gem_object_ggtt_pin_ww(obj, &eb->ww, NULL, 0, 0,
+							  PIN_MAPPABLE |
+							  PIN_NONBLOCK /* NOWARN */ |
+							  PIN_NOEVICT);
+		} else if (i915_vma_is_map_and_fenceable(batch)) {
+			__i915_vma_pin(batch);
+			vma = batch;
+		}
+
 		if (vma == ERR_PTR(-EDEADLK))
 			return vma;
 
@@ -1283,7 +1299,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
 	return vaddr;
 }
 
-static void *reloc_vaddr(struct drm_i915_gem_object *obj,
+static void *reloc_vaddr(struct i915_vma *vma,
 			 struct i915_execbuffer *eb,
 			 unsigned long page)
 {
@@ -1295,9 +1311,9 @@ static void *reloc_vaddr(struct drm_i915_gem_object *obj,
 	} else {
 		vaddr = NULL;
 		if ((cache->vaddr & KMAP) == 0)
-			vaddr = reloc_iomap(obj, eb, page);
+			vaddr = reloc_iomap(vma, eb, page);
 		if (!vaddr)
-			vaddr = reloc_kmap(obj, cache, page);
+			vaddr = reloc_kmap(vma->obj, cache, page);
 	}
 
 	return vaddr;
@@ -1338,7 +1354,7 @@ relocate_entry(struct i915_vma *vma,
 	void *vaddr;
 
 repeat:
-	vaddr = reloc_vaddr(vma->obj, eb,
+	vaddr = reloc_vaddr(vma, eb,
 			    offset >> PAGE_SHIFT);
 	if (IS_ERR(vaddr))
 		return PTR_ERR(vaddr);
@@ -1413,7 +1429,7 @@ eb_relocate_entry(struct i915_execbuffer *eb,
 			mutex_lock(&vma->vm->mutex);
 			err = i915_vma_bind(target->vma,
 					    target->vma->obj->cache_level,
-					    PIN_GLOBAL, NULL);
+					    PIN_GLOBAL, NULL, NULL);
 			mutex_unlock(&vma->vm->mutex);
 			reloc_cache_remap(&eb->reloc_cache, ev->vma->obj);
 			if (err)
@@ -1943,7 +1959,6 @@ static void eb_capture_stage(struct i915_execbuffer *eb)
 {
 	const unsigned int count = eb->buffer_count;
 	unsigned int i = count, j;
-	struct i915_vma_snapshot *vsnap;
 
 	while (i--) {
 		struct eb_vma *ev = &eb->vma[i];
@@ -1953,11 +1968,6 @@ static void eb_capture_stage(struct i915_execbuffer *eb)
 		if (!(flags & EXEC_OBJECT_CAPTURE))
 			continue;
 
-		vsnap = i915_vma_snapshot_alloc(GFP_KERNEL);
-		if (!vsnap)
-			continue;
-
-		i915_vma_snapshot_init(vsnap, vma, "user");
 		for_each_batch_create_order(eb, j) {
 			struct i915_capture_list *capture;
 
@@ -1966,10 +1976,9 @@ static void eb_capture_stage(struct i915_execbuffer *eb)
 				continue;
 
 			capture->next = eb->capture_lists[j];
-			capture->vma_snapshot = i915_vma_snapshot_get(vsnap);
+			capture->vma_res = i915_vma_resource_get(vma->resource);
 			eb->capture_lists[j] = capture;
 		}
-		i915_vma_snapshot_put(vsnap);
 	}
 }
 
@@ -2200,7 +2209,7 @@ shadow_batch_pin(struct i915_execbuffer *eb,
 	if (IS_ERR(vma))
 		return vma;
 
-	err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, flags);
+	err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, flags | PIN_VALIDATE);
 	if (err)
 		return ERR_PTR(err);
 
@@ -2214,7 +2223,7 @@ static struct i915_vma *eb_dispatch_secure(struct i915_execbuffer *eb, struct i9
 	 * batch" bit. Hence we need to pin secure batches into the global gtt.
 	 * hsw should have this fixed, but bdw mucks it up again. */
 	if (eb->batch_flags & I915_DISPATCH_SECURE)
-		return i915_gem_object_ggtt_pin_ww(vma->obj, &eb->ww, NULL, 0, 0, 0);
+		return i915_gem_object_ggtt_pin_ww(vma->obj, &eb->ww, NULL, 0, 0, PIN_VALIDATE);
 
 	return NULL;
 }
@@ -2265,13 +2274,12 @@ static int eb_parse(struct i915_execbuffer *eb)
 
 	err = i915_gem_object_lock(pool->obj, &eb->ww);
 	if (err)
-		goto err;
+		return err;
 
 	shadow = shadow_batch_pin(eb, pool->obj, eb->context->vm, PIN_USER);
-	if (IS_ERR(shadow)) {
-		err = PTR_ERR(shadow);
-		goto err;
-	}
+	if (IS_ERR(shadow))
+		return PTR_ERR(shadow);
+
 	intel_gt_buffer_pool_mark_used(pool);
 	i915_gem_object_set_readonly(shadow->obj);
 	shadow->private = pool;
@@ -2283,25 +2291,21 @@ static int eb_parse(struct i915_execbuffer *eb)
 		shadow = shadow_batch_pin(eb, pool->obj,
 					  &eb->gt->ggtt->vm,
 					  PIN_GLOBAL);
-		if (IS_ERR(shadow)) {
-			err = PTR_ERR(shadow);
-			shadow = trampoline;
-			goto err_shadow;
-		}
+		if (IS_ERR(shadow))
+			return PTR_ERR(shadow);
+
 		shadow->private = pool;
 
 		eb->batch_flags |= I915_DISPATCH_SECURE;
 	}
 
 	batch = eb_dispatch_secure(eb, shadow);
-	if (IS_ERR(batch)) {
-		err = PTR_ERR(batch);
-		goto err_trampoline;
-	}
+	if (IS_ERR(batch))
+		return PTR_ERR(batch);
 
 	err = dma_resv_reserve_shared(shadow->obj->base.resv, 1);
 	if (err)
-		goto err_trampoline;
+		return err;
 
 	err = intel_engine_cmd_parser(eb->context->engine,
 				      eb->batches[0]->vma,
@@ -2309,7 +2313,7 @@ static int eb_parse(struct i915_execbuffer *eb)
 				      eb->batch_len[0],
 				      shadow, trampoline);
 	if (err)
-		goto err_unpin_batch;
+		return err;
 
 	eb->batches[0] = &eb->vma[eb->buffer_count++];
 	eb->batches[0]->vma = i915_vma_get(shadow);
@@ -2328,17 +2332,6 @@ static int eb_parse(struct i915_execbuffer *eb)
 		eb->batches[0]->vma = i915_vma_get(batch);
 	}
 	return 0;
-
-err_unpin_batch:
-	if (batch)
-		i915_vma_unpin(batch);
-err_trampoline:
-	if (trampoline)
-		i915_vma_unpin(trampoline);
-err_shadow:
-	i915_vma_unpin(shadow);
-err:
-	return err;
 }
 
 static int eb_request_submit(struct i915_execbuffer *eb,
@@ -3277,9 +3270,8 @@ eb_requests_create(struct i915_execbuffer *eb, struct dma_fence *in_fence,
 		 * _onstack interface.
 		 */
 		if (eb->batches[i]->vma)
-			i915_vma_snapshot_init_onstack(&eb->requests[i]->batch_snapshot,
-						       eb->batches[i]->vma,
-						       "batch");
+			eb->requests[i]->batch_res =
+				i915_vma_resource_get(eb->batches[i]->vma->resource);
 		if (eb->batch_pool) {
 			GEM_BUG_ON(intel_context_is_parallel(eb->context));
 			intel_gt_buffer_pool_mark_active(eb->batch_pool,
@@ -3464,8 +3456,6 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 
 err_vma:
 	eb_release_vmas(&eb, true);
-	if (eb.trampoline)
-		i915_vma_unpin(eb.trampoline);
 	WARN_ON(err == -EDEADLK);
 	i915_gem_ww_ctx_fini(&eb.ww);
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
index c5150a1ee3d22fea1f9fadfa47133a4f79009043..c698f95af15fee0af3b5f04dd06799643474eed3 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
@@ -10,6 +10,7 @@
 
 #include "i915_drv.h"
 #include "i915_gem.h"
+#include "i915_gem_internal.h"
 #include "i915_gem_object.h"
 #include "i915_scatterlist.h"
 #include "i915_utils.h"
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.h b/drivers/gpu/drm/i915/gem/i915_gem_internal.h
new file mode 100644
index 0000000000000000000000000000000000000000..6664e06112fc3989dfb78dc42029702b9d317e1a
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef __I915_GEM_INTERNAL_H__
+#define __I915_GEM_INTERNAL_H__
+
+#include <linux/types.h>
+
+struct drm_i915_gem_object;
+struct drm_i915_gem_object_ops;
+struct drm_i915_private;
+
+struct drm_i915_gem_object *
+i915_gem_object_create_internal(struct drm_i915_private *i915,
+				phys_addr_t size);
+struct drm_i915_gem_object *
+__i915_gem_object_create_internal(struct drm_i915_private *i915,
+				  const struct drm_i915_gem_object_ops *ops,
+				  phys_addr_t size);
+
+#endif /* __I915_GEM_INTERNAL_H__ */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index 1478c02a82cbef49bbbe02163f3193b04c8920dc..efe69d6b86f438565415d1f9cb12f885255a9fa5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -9,10 +9,13 @@
 #include <linux/pfn_t.h>
 #include <linux/sizes.h>
 
+#include <drm/drm_cache.h>
+
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_requests.h"
 
 #include "i915_drv.h"
+#include "i915_gem_evict.h"
 #include "i915_gem_gtt.h"
 #include "i915_gem_ioctls.h"
 #include "i915_gem_object.h"
@@ -295,7 +298,7 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *i915 = to_i915(dev);
 	struct intel_runtime_pm *rpm = &i915->runtime_pm;
-	struct i915_ggtt *ggtt = &i915->ggtt;
+	struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
 	bool write = area->vm_flags & VM_WRITE;
 	struct i915_gem_ww_ctx ww;
 	intel_wakeref_t wakeref;
@@ -358,8 +361,21 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
 			vma = i915_gem_object_ggtt_pin_ww(obj, &ww, &view, 0, 0, flags);
 		}
 
-		/* The entire mappable GGTT is pinned? Unexpected! */
-		GEM_BUG_ON(vma == ERR_PTR(-ENOSPC));
+		/*
+		 * The entire mappable GGTT is pinned? Unexpected!
+		 * Try to evict the object we locked too, as normally we skip it
+		 * due to lack of short term pinning inside execbuf.
+		 */
+		if (vma == ERR_PTR(-ENOSPC)) {
+			ret = mutex_lock_interruptible(&ggtt->vm.mutex);
+			if (!ret) {
+				ret = i915_gem_evict_vm(&ggtt->vm, &ww);
+				mutex_unlock(&ggtt->vm.mutex);
+			}
+			if (ret)
+				goto err_reset;
+			vma = i915_gem_object_ggtt_pin_ww(obj, &ww, &view, 0, 0, flags);
+		}
 	}
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
@@ -388,16 +404,16 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
 	assert_rpm_wakelock_held(rpm);
 
 	/* Mark as being mmapped into userspace for later revocation */
-	mutex_lock(&i915->ggtt.vm.mutex);
+	mutex_lock(&to_gt(i915)->ggtt->vm.mutex);
 	if (!i915_vma_set_userfault(vma) && !obj->userfault_count++)
-		list_add(&obj->userfault_link, &i915->ggtt.userfault_list);
-	mutex_unlock(&i915->ggtt.vm.mutex);
+		list_add(&obj->userfault_link, &to_gt(i915)->ggtt->userfault_list);
+	mutex_unlock(&to_gt(i915)->ggtt->vm.mutex);
 
 	/* Track the mmo associated with the fenced vma */
 	vma->mmo = mmo;
 
 	if (CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)
-		intel_wakeref_auto(&i915->ggtt.userfault_wakeref,
+		intel_wakeref_auto(&to_gt(i915)->ggtt->userfault_wakeref,
 				   msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND));
 
 	if (write) {
@@ -512,7 +528,7 @@ void i915_gem_object_release_mmap_gtt(struct drm_i915_gem_object *obj)
 	 * wakeref.
 	 */
 	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-	mutex_lock(&i915->ggtt.vm.mutex);
+	mutex_lock(&to_gt(i915)->ggtt->vm.mutex);
 
 	if (!obj->userfault_count)
 		goto out;
@@ -530,7 +546,7 @@ void i915_gem_object_release_mmap_gtt(struct drm_i915_gem_object *obj)
 	wmb();
 
 out:
-	mutex_unlock(&i915->ggtt.vm.mutex);
+	mutex_unlock(&to_gt(i915)->ggtt->vm.mutex);
 	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 }
 
@@ -736,13 +752,14 @@ i915_gem_dumb_mmap_offset(struct drm_file *file,
 			  u32 handle,
 			  u64 *offset)
 {
+	struct drm_i915_private *i915 = to_i915(dev);
 	enum i915_mmap_type mmap_type;
 
 	if (HAS_LMEM(to_i915(dev)))
 		mmap_type = I915_MMAP_TYPE_FIXED;
 	else if (pat_enabled())
 		mmap_type = I915_MMAP_TYPE_WC;
-	else if (!i915_ggtt_has_aperture(&to_i915(dev)->ggtt))
+	else if (!i915_ggtt_has_aperture(to_gt(i915)->ggtt))
 		return -ENODEV;
 	else
 		mmap_type = I915_MMAP_TYPE_GTT;
@@ -790,7 +807,7 @@ i915_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
 
 	switch (args->flags) {
 	case I915_MMAP_OFFSET_GTT:
-		if (!i915_ggtt_has_aperture(&i915->ggtt))
+		if (!i915_ggtt_has_aperture(to_gt(i915)->ggtt))
 			return -ENODEV;
 		type = I915_MMAP_TYPE_GTT;
 		break;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index d87b508b59b1a980b9dd3a3c51d1b4f3192d10d1..2d593d573ef1feb94679dbe1b6bad25131e7e9a6 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -24,11 +24,16 @@
 
 #include <linux/sched/mm.h>
 
+#include <drm/drm_cache.h>
+
 #include "display/intel_frontbuffer.h"
 #include "pxp/intel_pxp.h"
+
 #include "i915_drv.h"
+#include "i915_file_private.h"
 #include "i915_gem_clflush.h"
 #include "i915_gem_context.h"
+#include "i915_gem_dmabuf.h"
 #include "i915_gem_mman.h"
 #include "i915_gem_object.h"
 #include "i915_gem_ttm.h"
@@ -280,6 +285,12 @@ void __i915_gem_object_pages_fini(struct drm_i915_gem_object *obj)
 			GEM_BUG_ON(vma->obj != obj);
 			spin_unlock(&obj->vma.lock);
 
+			/* Verify that the vma is unbound under the vm mutex. */
+			mutex_lock(&vma->vm->mutex);
+			atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
+			__i915_vma_unbind(vma);
+			mutex_unlock(&vma->vm->mutex);
+
 			__i915_vma_put(vma);
 
 			spin_lock(&obj->vma.lock);
@@ -756,6 +767,18 @@ i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj)
 	return dma_fence_get(i915_gem_to_ttm(obj)->moving);
 }
 
+void i915_gem_object_set_moving_fence(struct drm_i915_gem_object *obj,
+				      struct dma_fence *fence)
+{
+	struct dma_fence **moving = &i915_gem_to_ttm(obj)->moving;
+
+	if (*moving == fence)
+		return;
+
+	dma_fence_put(*moving);
+	*moving = dma_fence_get(fence);
+}
+
 /**
  * i915_gem_object_wait_moving_fence - Wait for the object's moving fence if any
  * @obj: The object whose moving fence to wait for.
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index f66d46882ea7892fb48b5b3f317b75035568bf3c..02c37fe4a535f9d546f1e8587a5d6c3fb4b3f296 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -459,7 +459,6 @@ i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
 
 int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
 int i915_gem_object_truncate(struct drm_i915_gem_object *obj);
-void i915_gem_object_writeback(struct drm_i915_gem_object *obj);
 
 /**
  * i915_gem_object_pin_map - return a contiguous mapping of the entire object
@@ -524,6 +523,9 @@ i915_gem_object_finish_access(struct drm_i915_gem_object *obj)
 struct dma_fence *
 i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj);
 
+void i915_gem_object_set_moving_fence(struct drm_i915_gem_object *obj,
+				      struct dma_fence *fence);
+
 int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj,
 				      bool intr);
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 0dd107dcecc29075172ed2b92373598e518406db..0098a32490f06df6ef16ec0cc999ecdfb8870d40 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -15,6 +15,7 @@
 
 #include "i915_active.h"
 #include "i915_selftest.h"
+#include "i915_vma_resource.h"
 
 struct drm_i915_gem_object;
 struct intel_fronbuffer;
@@ -57,10 +58,26 @@ struct drm_i915_gem_object_ops {
 	void (*put_pages)(struct drm_i915_gem_object *obj,
 			  struct sg_table *pages);
 	int (*truncate)(struct drm_i915_gem_object *obj);
-	void (*writeback)(struct drm_i915_gem_object *obj);
-	int (*shrinker_release_pages)(struct drm_i915_gem_object *obj,
-				      bool no_gpu_wait,
-				      bool should_writeback);
+	/**
+	 * shrink - Perform further backend specific actions to facilate
+	 * shrinking.
+	 * @obj: The gem object
+	 * @flags: Extra flags to control shrinking behaviour in the backend
+	 *
+	 * Possible values for @flags:
+	 *
+	 * I915_GEM_OBJECT_SHRINK_WRITEBACK - Try to perform writeback of the
+	 * backing pages, if supported.
+	 *
+	 * I915_GEM_OBJECT_SHRINK_NO_GPU_WAIT - Don't wait for the object to
+	 * idle.  Active objects can be considered later. The TTM backend for
+	 * example might have aync migrations going on, which don't use any
+	 * i915_vma to track the active GTT binding, and hence having an unbound
+	 * object might not be enough.
+	 */
+#define I915_GEM_OBJECT_SHRINK_WRITEBACK   BIT(0)
+#define I915_GEM_OBJECT_SHRINK_NO_GPU_WAIT BIT(1)
+	int (*shrink)(struct drm_i915_gem_object *obj, unsigned int flags);
 
 	int (*pread)(struct drm_i915_gem_object *obj,
 		     const struct drm_i915_gem_pread *arg);
@@ -551,31 +568,7 @@ struct drm_i915_gem_object {
 		struct sg_table *pages;
 		void *mapping;
 
-		struct i915_page_sizes {
-			/**
-			 * The sg mask of the pages sg_table. i.e the mask of
-			 * of the lengths for each sg entry.
-			 */
-			unsigned int phys;
-
-			/**
-			 * The gtt page sizes we are allowed to use given the
-			 * sg mask and the supported page sizes. This will
-			 * express the smallest unit we can use for the whole
-			 * object, as well as the larger sizes we may be able
-			 * to use opportunistically.
-			 */
-			unsigned int sg;
-
-			/**
-			 * The actual gtt page size usage. Since we can have
-			 * multiple vma associated with this object we need to
-			 * prevent any trampling of state, hence a copy of this
-			 * struct also lives in each vma, therefore the gtt
-			 * value here should only be read/write through the vma.
-			 */
-			unsigned int gtt;
-		} page_sizes;
+		struct i915_page_sizes page_sizes;
 
 		I915_SELFTEST_DECLARE(unsigned int page_mask);
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index a50f884973bc1a847b41de98d040b14c48f20cdf..183b861620b82339a673bf5187331c0f96d4ea0a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -4,6 +4,8 @@
  * Copyright © 2014-2016 Intel Corporation
  */
 
+#include <drm/drm_cache.h>
+
 #include "i915_drv.h"
 #include "i915_gem_object.h"
 #include "i915_scatterlist.h"
@@ -169,16 +171,6 @@ int i915_gem_object_truncate(struct drm_i915_gem_object *obj)
 	return 0;
 }
 
-/* Try to discard unwanted pages */
-void i915_gem_object_writeback(struct drm_i915_gem_object *obj)
-{
-	assert_object_held_shared(obj);
-	GEM_BUG_ON(i915_gem_object_has_pages(obj));
-
-	if (obj->ops->writeback)
-		obj->ops->writeback(obj);
-}
-
 static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
 {
 	struct radix_tree_iter iter;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index ac56124760e18172acdaf742b40a803271014ba6..00359ec9d58b0535c274e1ab139ea1f023a50e31 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -10,6 +10,7 @@
 #include "gt/intel_gt_pm.h"
 #include "gt/intel_gt_requests.h"
 
+#include "i915_driver.h"
 #include "i915_drv.h"
 
 #if defined(CONFIG_X86)
@@ -23,7 +24,7 @@ void i915_gem_suspend(struct drm_i915_private *i915)
 {
 	GEM_TRACE("%s\n", dev_name(i915->drm.dev));
 
-	intel_wakeref_auto(&i915->ggtt.userfault_wakeref, 0);
+	intel_wakeref_auto(&to_gt(i915)->ggtt->userfault_wakeref, 0);
 	flush_workqueue(i915->wq);
 
 	/*
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index cc9fe258fba73cf48a9a04a1ead837693f094ede..4efa821f3cb19f2c6f311d39598de6ad2d37931e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -5,8 +5,11 @@
  */
 
 #include <linux/pagevec.h>
+#include <linux/shmem_fs.h>
 #include <linux/swap.h>
 
+#include <drm/drm_cache.h>
+
 #include "gem/i915_gem_region.h"
 #include "i915_drv.h"
 #include "i915_gemfs.h"
@@ -331,6 +334,21 @@ shmem_writeback(struct drm_i915_gem_object *obj)
 	__shmem_writeback(obj->base.size, obj->base.filp->f_mapping);
 }
 
+static int shmem_shrink(struct drm_i915_gem_object *obj, unsigned int flags)
+{
+	switch (obj->mm.madv) {
+	case I915_MADV_DONTNEED:
+		return i915_gem_object_truncate(obj);
+	case __I915_MADV_PURGED:
+		return 0;
+	}
+
+	if (flags & I915_GEM_OBJECT_SHRINK_WRITEBACK)
+		shmem_writeback(obj);
+
+	return 0;
+}
+
 void
 __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
 				struct sg_table *pages,
@@ -503,7 +521,7 @@ const struct drm_i915_gem_object_ops i915_gem_shmem_ops = {
 	.get_pages = shmem_get_pages,
 	.put_pages = shmem_put_pages,
 	.truncate = shmem_truncate,
-	.writeback = shmem_writeback,
+	.shrink = shmem_shrink,
 
 	.pwrite = shmem_pwrite,
 	.pread = shmem_pread,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index cc927e49d21f38a5f94be620f8845d480e8a029e..6a6ff98a87462b388f8af950fcc1b0282e656ade 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -57,21 +57,17 @@ static int drop_pages(struct drm_i915_gem_object *obj,
 
 static int try_to_writeback(struct drm_i915_gem_object *obj, unsigned int flags)
 {
-	if (obj->ops->shrinker_release_pages)
-		return obj->ops->shrinker_release_pages(obj,
-							!(flags & I915_SHRINK_ACTIVE),
-							flags & I915_SHRINK_WRITEBACK);
-
-	switch (obj->mm.madv) {
-	case I915_MADV_DONTNEED:
-		i915_gem_object_truncate(obj);
-		return 0;
-	case __I915_MADV_PURGED:
-		return 0;
-	}
+	if (obj->ops->shrink) {
+		unsigned int shrink_flags = 0;
+
+		if (!(flags & I915_SHRINK_ACTIVE))
+			shrink_flags |= I915_GEM_OBJECT_SHRINK_NO_GPU_WAIT;
 
-	if (flags & I915_SHRINK_WRITEBACK)
-		i915_gem_object_writeback(obj);
+		if (flags & I915_SHRINK_WRITEBACK)
+			shrink_flags |= I915_GEM_OBJECT_SHRINK_WRITEBACK;
+
+		return obj->ops->shrink(obj, shrink_flags);
+	}
 
 	return 0;
 }
@@ -401,9 +397,9 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
 					       I915_SHRINK_VMAPS);
 
 	/* We also want to clear any cached iomaps as they wrap vmap */
-	mutex_lock(&i915->ggtt.vm.mutex);
+	mutex_lock(&to_gt(i915)->ggtt->vm.mutex);
 	list_for_each_entry_safe(vma, next,
-				 &i915->ggtt.vm.bound_list, vm_link) {
+				 &to_gt(i915)->ggtt->vm.bound_list, vm_link) {
 		unsigned long count = vma->node.size >> PAGE_SHIFT;
 		struct drm_i915_gem_object *obj = vma->obj;
 
@@ -418,7 +414,7 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
 
 		i915_gem_object_unlock(obj);
 	}
-	mutex_unlock(&i915->ggtt.vm.mutex);
+	mutex_unlock(&to_gt(i915)->ggtt->vm.mutex);
 
 	*(unsigned long *)ptr += freed_pages;
 	return NOTIFY_DONE;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index 4dfed34191c67e538583539c2c534aaba75a2ce3..b9c3196b91cab8a441dcd938c3d733b895905bfb 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -16,6 +16,7 @@
 #include "i915_gem_stolen.h"
 #include "i915_reg.h"
 #include "i915_vgpu.h"
+#include "intel_mchbar_regs.h"
 
 /*
  * The BIOS typically reserves some of the system's memory for the exclusive
@@ -72,7 +73,7 @@ void i915_gem_stolen_remove_node(struct drm_i915_private *i915,
 static int i915_adjust_stolen(struct drm_i915_private *i915,
 			      struct resource *dsm)
 {
-	struct i915_ggtt *ggtt = &i915->ggtt;
+	struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
 	struct intel_uncore *uncore = ggtt->vm.gt->uncore;
 	struct resource *r;
 
@@ -583,6 +584,7 @@ i915_pages_create_for_stolen(struct drm_device *dev,
 
 static int i915_gem_object_get_pages_stolen(struct drm_i915_gem_object *obj)
 {
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 	struct sg_table *pages =
 		i915_pages_create_for_stolen(obj->base.dev,
 					     obj->stolen->start,
@@ -590,7 +592,7 @@ static int i915_gem_object_get_pages_stolen(struct drm_i915_gem_object *obj)
 	if (IS_ERR(pages))
 		return PTR_ERR(pages);
 
-	dbg_poison(&to_i915(obj->base.dev)->ggtt,
+	dbg_poison(to_gt(i915)->ggtt,
 		   sg_dma_address(pages->sgl),
 		   sg_dma_len(pages->sgl),
 		   POISON_INUSE);
@@ -603,9 +605,10 @@ static int i915_gem_object_get_pages_stolen(struct drm_i915_gem_object *obj)
 static void i915_gem_object_put_pages_stolen(struct drm_i915_gem_object *obj,
 					     struct sg_table *pages)
 {
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 	/* Should only be called from i915_gem_object_release_stolen() */
 
-	dbg_poison(&to_i915(obj->base.dev)->ggtt,
+	dbg_poison(to_gt(i915)->ggtt,
 		   sg_dma_address(pages->sgl),
 		   sg_dma_len(pages->sgl),
 		   POISON_FREE);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_throttle.c b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c
index 75501db71041232ff0eb35044283aa80c6525d30..af85d0c2816865a8486dc26f4e86b55475de824c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_throttle.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c
@@ -9,6 +9,7 @@
 #include <drm/drm_file.h>
 
 #include "i915_drv.h"
+#include "i915_file_private.h"
 #include "i915_gem_context.h"
 #include "i915_gem_ioctls.h"
 #include "i915_gem_object.h"
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
index 0e0e4805161a80cfbe6506e29f4477b7868d28ed..d6adda5bf96b76f03bc0eefd3f883dd694ab90b6 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
@@ -183,7 +183,8 @@ static int
 i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj,
 			      int tiling_mode, unsigned int stride)
 {
-	struct i915_ggtt *ggtt = &to_i915(obj->base.dev)->ggtt;
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
 	struct i915_vma *vma, *vn;
 	LIST_HEAD(unbind);
 	int ret = 0;
@@ -338,7 +339,7 @@ i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
 	struct drm_i915_gem_object *obj;
 	int err;
 
-	if (!dev_priv->ggtt.num_fences)
+	if (!to_gt(dev_priv)->ggtt->num_fences)
 		return -EOPNOTSUPP;
 
 	obj = i915_gem_object_lookup(file, args->handle);
@@ -364,9 +365,9 @@ i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
 		args->stride = 0;
 	} else {
 		if (args->tiling_mode == I915_TILING_X)
-			args->swizzle_mode = to_i915(dev)->ggtt.bit_6_swizzle_x;
+			args->swizzle_mode = to_gt(dev_priv)->ggtt->bit_6_swizzle_x;
 		else
-			args->swizzle_mode = to_i915(dev)->ggtt.bit_6_swizzle_y;
+			args->swizzle_mode = to_gt(dev_priv)->ggtt->bit_6_swizzle_y;
 
 		/* Hide bit 17 swizzling from the user.  This prevents old Mesa
 		 * from aborting the application on sw fallbacks to bit 17,
@@ -421,7 +422,7 @@ i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
 	struct drm_i915_gem_object *obj;
 	int err = -ENOENT;
 
-	if (!dev_priv->ggtt.num_fences)
+	if (!to_gt(dev_priv)->ggtt->num_fences)
 		return -EOPNOTSUPP;
 
 	rcu_read_lock();
@@ -437,10 +438,10 @@ i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
 
 	switch (args->tiling_mode) {
 	case I915_TILING_X:
-		args->swizzle_mode = dev_priv->ggtt.bit_6_swizzle_x;
+		args->swizzle_mode = to_gt(dev_priv)->ggtt->bit_6_swizzle_x;
 		break;
 	case I915_TILING_Y:
-		args->swizzle_mode = dev_priv->ggtt.bit_6_swizzle_y;
+		args->swizzle_mode = to_gt(dev_priv)->ggtt->bit_6_swizzle_y;
 		break;
 	default:
 	case I915_TILING_NONE:
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index de3fe79b665a111bb34085e49dc5230b2a1356f0..8419096d40565e42d4a8cd68fdc500c1d4864be9 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -3,6 +3,8 @@
  * Copyright © 2021 Intel Corporation
  */
 
+#include <linux/shmem_fs.h>
+
 #include <drm/ttm/ttm_bo_driver.h>
 #include <drm/ttm/ttm_placement.h>
 
@@ -424,16 +426,14 @@ int i915_ttm_purge(struct drm_i915_gem_object *obj)
 	return 0;
 }
 
-static int i915_ttm_shrinker_release_pages(struct drm_i915_gem_object *obj,
-					   bool no_wait_gpu,
-					   bool should_writeback)
+static int i915_ttm_shrink(struct drm_i915_gem_object *obj, unsigned int flags)
 {
 	struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
 	struct i915_ttm_tt *i915_tt =
 		container_of(bo->ttm, typeof(*i915_tt), ttm);
 	struct ttm_operation_ctx ctx = {
 		.interruptible = true,
-		.no_wait_gpu = no_wait_gpu,
+		.no_wait_gpu = flags & I915_GEM_OBJECT_SHRINK_NO_GPU_WAIT,
 	};
 	struct ttm_placement place = {};
 	int ret;
@@ -467,7 +467,7 @@ static int i915_ttm_shrinker_release_pages(struct drm_i915_gem_object *obj,
 		return ret;
 	}
 
-	if (should_writeback)
+	if (flags & I915_GEM_OBJECT_SHRINK_WRITEBACK)
 		__shmem_writeback(obj->base.size, i915_tt->filp->f_mapping);
 
 	return 0;
@@ -842,11 +842,9 @@ void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj)
 	} else if (obj->mm.madv != I915_MADV_WILLNEED) {
 		bo->priority = I915_TTM_PRIO_PURGE;
 	} else if (!i915_gem_object_has_pages(obj)) {
-		if (bo->priority < I915_TTM_PRIO_HAS_PAGES)
-			bo->priority = I915_TTM_PRIO_HAS_PAGES;
+		bo->priority = I915_TTM_PRIO_NO_PAGES;
 	} else {
-		if (bo->priority > I915_TTM_PRIO_NO_PAGES)
-			bo->priority = I915_TTM_PRIO_NO_PAGES;
+		bo->priority = I915_TTM_PRIO_HAS_PAGES;
 	}
 
 	ttm_bo_move_to_lru_tail(bo, bo->resource, NULL);
@@ -977,7 +975,7 @@ static const struct drm_i915_gem_object_ops i915_gem_ttm_obj_ops = {
 	.get_pages = i915_ttm_get_pages,
 	.put_pages = i915_ttm_put_pages,
 	.truncate = i915_ttm_truncate,
-	.shrinker_release_pages = i915_ttm_shrinker_release_pages,
+	.shrink = i915_ttm_shrink,
 
 	.adjust_lru = i915_ttm_adjust_lru,
 	.delayed_free = i915_ttm_delayed_free,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index e130c820ae4efeb68319fa4acbba29c96ebacdbc..1ebe6e4086a1d4023e96b7dd1e58a5df7822e380 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -142,7 +142,16 @@ int i915_ttm_move_notify(struct ttm_buffer_object *bo)
 	struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
 	int ret;
 
-	ret = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE);
+	/*
+	 * Note: The async unbinding here will actually transform the
+	 * blocking wait for unbind into a wait before finally submitting
+	 * evict / migration blit and thus stall the migration timeline
+	 * which may not be good for overall throughput. We should make
+	 * sure we await the unbind fences *after* the migration blit
+	 * instead of *before* as we currently do.
+	 */
+	ret = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE |
+				     I915_GEM_OBJECT_UNBIND_ASYNC);
 	if (ret)
 		return ret;
 
@@ -531,7 +540,7 @@ int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
 			return ret;
 		}
 
-		migration_fence = __i915_ttm_move(bo, ctx, clear, dst_mem, bo->ttm,
+		migration_fence = __i915_ttm_move(bo, ctx, clear, dst_mem, ttm,
 						  dst_rsgt, true, &deps);
 		i915_deps_fini(&deps);
 	}
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
index 11f0aa65f8a319ee69f23dd93fba8380a29ffcba..8424ee8c5eb8e700c6689d21c9d42c7814f41bf4 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
@@ -8,9 +8,10 @@
 
 #include "i915_selftest.h"
 
-#include "gem/i915_gem_region.h"
+#include "gem/i915_gem_internal.h"
 #include "gem/i915_gem_lmem.h"
 #include "gem/i915_gem_pm.h"
+#include "gem/i915_gem_region.h"
 
 #include "gt/intel_gt.h"
 
@@ -370,9 +371,9 @@ static int igt_check_page_sizes(struct i915_vma *vma)
 		err = -EINVAL;
 	}
 
-	if (!HAS_PAGE_SIZES(i915, vma->page_sizes.gtt)) {
+	if (!HAS_PAGE_SIZES(i915, vma->resource->page_sizes_gtt)) {
 		pr_err("unsupported page_sizes.gtt=%u, supported=%u\n",
-		       vma->page_sizes.gtt & ~supported, supported);
+		       vma->resource->page_sizes_gtt & ~supported, supported);
 		err = -EINVAL;
 	}
 
@@ -403,15 +404,9 @@ static int igt_check_page_sizes(struct i915_vma *vma)
 	if (i915_gem_object_is_lmem(obj) &&
 	    IS_ALIGNED(vma->node.start, SZ_2M) &&
 	    vma->page_sizes.sg & SZ_2M &&
-	    vma->page_sizes.gtt < SZ_2M) {
+	    vma->resource->page_sizes_gtt < SZ_2M) {
 		pr_err("gtt pages mismatch for LMEM, expected 2M GTT pages, sg(%u), gtt(%u)\n",
-		       vma->page_sizes.sg, vma->page_sizes.gtt);
-		err = -EINVAL;
-	}
-
-	if (obj->mm.page_sizes.gtt) {
-		pr_err("obj->page_sizes.gtt(%u) should never be set\n",
-		       obj->mm.page_sizes.gtt);
+		       vma->page_sizes.sg, vma->resource->page_sizes_gtt);
 		err = -EINVAL;
 	}
 
@@ -547,9 +542,9 @@ static int igt_mock_memory_region_huge_pages(void *arg)
 				goto out_unpin;
 			}
 
-			if (vma->page_sizes.gtt != page_size) {
+			if (vma->resource->page_sizes_gtt != page_size) {
 				pr_err("%s page_sizes.gtt=%u, expected=%u\n",
-				       __func__, vma->page_sizes.gtt,
+				       __func__, vma->resource->page_sizes_gtt,
 				       page_size);
 				err = -EINVAL;
 				goto out_unpin;
@@ -630,9 +625,9 @@ static int igt_mock_ppgtt_misaligned_dma(void *arg)
 
 		err = igt_check_page_sizes(vma);
 
-		if (vma->page_sizes.gtt != page_size) {
+		if (vma->resource->page_sizes_gtt != page_size) {
 			pr_err("page_sizes.gtt=%u, expected %u\n",
-			       vma->page_sizes.gtt, page_size);
+			       vma->resource->page_sizes_gtt, page_size);
 			err = -EINVAL;
 		}
 
@@ -647,7 +642,7 @@ static int igt_mock_ppgtt_misaligned_dma(void *arg)
 		 * pages.
 		 */
 		for (offset = 4096; offset < page_size; offset += 4096) {
-			err = i915_vma_unbind(vma);
+			err = i915_vma_unbind_unlocked(vma);
 			if (err)
 				goto out_unpin;
 
@@ -657,9 +652,10 @@ static int igt_mock_ppgtt_misaligned_dma(void *arg)
 
 			err = igt_check_page_sizes(vma);
 
-			if (vma->page_sizes.gtt != I915_GTT_PAGE_SIZE_4K) {
+			if (vma->resource->page_sizes_gtt != I915_GTT_PAGE_SIZE_4K) {
 				pr_err("page_sizes.gtt=%u, expected %llu\n",
-				       vma->page_sizes.gtt, I915_GTT_PAGE_SIZE_4K);
+				       vma->resource->page_sizes_gtt,
+				       I915_GTT_PAGE_SIZE_4K);
 				err = -EINVAL;
 			}
 
@@ -805,9 +801,9 @@ static int igt_mock_ppgtt_huge_fill(void *arg)
 			}
 		}
 
-		if (vma->page_sizes.gtt != expected_gtt) {
+		if (vma->resource->page_sizes_gtt != expected_gtt) {
 			pr_err("gtt=%u, expected=%u, size=%zd, single=%s\n",
-			       vma->page_sizes.gtt, expected_gtt,
+			       vma->resource->page_sizes_gtt, expected_gtt,
 			       obj->base.size, yesno(!!single));
 			err = -EINVAL;
 			break;
@@ -961,10 +957,10 @@ static int igt_mock_ppgtt_64K(void *arg)
 				}
 			}
 
-			if (vma->page_sizes.gtt != expected_gtt) {
+			if (vma->resource->page_sizes_gtt != expected_gtt) {
 				pr_err("gtt=%u, expected=%u, i=%d, single=%s\n",
-				       vma->page_sizes.gtt, expected_gtt, i,
-				       yesno(!!single));
+				       vma->resource->page_sizes_gtt,
+				       expected_gtt, i, yesno(!!single));
 				err = -EINVAL;
 				goto out_vma_unpin;
 			}
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
index 0be86ffb7c19ef78049975ca7e80141c7d805740..8f28e46e8ee504c53f9da29832bf406aebeb459e 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
@@ -319,7 +319,7 @@ static int pin_buffer(struct i915_vma *vma, u64 addr)
 	int err;
 
 	if (drm_mm_node_allocated(&vma->node) && vma->node.start != addr) {
-		err = i915_vma_unbind(vma);
+		err = i915_vma_unbind_unlocked(vma);
 		if (err)
 			return err;
 	}
@@ -544,7 +544,7 @@ static bool has_bit17_swizzle(int sw)
 
 static bool bad_swizzling(struct drm_i915_private *i915)
 {
-	struct i915_ggtt *ggtt = &i915->ggtt;
+	struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
 
 	if (i915->quirks & QUIRK_PIN_SWIZZLED_PAGES)
 		return true;
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index 7d327ffd0464891e74c14b3e3527a66556107734..bd60d42238fb00cb2feb7f87c0ad4d9af8c24433 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -6,6 +6,7 @@
 
 #include <linux/prime_numbers.h>
 
+#include "gem/i915_gem_internal.h"
 #include "gem/i915_gem_pm.h"
 #include "gt/intel_engine_pm.h"
 #include "gt/intel_engine_regs.h"
@@ -1375,7 +1376,7 @@ static int igt_ctx_readonly(void *arg)
 		goto out_file;
 	}
 
-	vm = ctx->vm ?: &i915->ggtt.alias->vm;
+	vm = ctx->vm ?: &to_gt(i915)->ggtt->alias->vm;
 	if (!vm || !vm->has_read_only) {
 		err = 0;
 		goto out_file;
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c
index ecb691c81d1e318d5fb06d99c30f626423f5b424..d534141b2cf7cbf186cdefaacae574d16f88cb28 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c
@@ -4,8 +4,13 @@
  */
 
 #include "gt/intel_migrate.h"
+#include "gt/intel_gpu_commands.h"
 #include "gem/i915_gem_ttm_move.h"
 
+#include "i915_deps.h"
+
+#include "selftests/igt_spinner.h"
+
 static int igt_fill_check_buffer(struct drm_i915_gem_object *obj,
 				 bool fill)
 {
@@ -101,7 +106,8 @@ static int igt_same_create_migrate(void *arg)
 }
 
 static int lmem_pages_migrate_one(struct i915_gem_ww_ctx *ww,
-				  struct drm_i915_gem_object *obj)
+				  struct drm_i915_gem_object *obj,
+				  struct i915_vma *vma)
 {
 	int err;
 
@@ -109,6 +115,24 @@ static int lmem_pages_migrate_one(struct i915_gem_ww_ctx *ww,
 	if (err)
 		return err;
 
+	if (vma) {
+		err = i915_vma_pin_ww(vma, ww, obj->base.size, 0,
+				      0UL | PIN_OFFSET_FIXED |
+				      PIN_USER);
+		if (err) {
+			if (err != -EINTR && err != ERESTARTSYS &&
+			    err != -EDEADLK)
+				pr_err("Failed to pin vma.\n");
+			return err;
+		}
+
+		i915_vma_unpin(vma);
+	}
+
+	/*
+	 * Migration will implicitly unbind (asynchronously) any bound
+	 * vmas.
+	 */
 	if (i915_gem_object_is_lmem(obj)) {
 		err = i915_gem_object_migrate(obj, ww, INTEL_REGION_SMEM);
 		if (err) {
@@ -149,11 +173,15 @@ static int lmem_pages_migrate_one(struct i915_gem_ww_ctx *ww,
 	return err;
 }
 
-static int igt_lmem_pages_migrate(void *arg)
+static int __igt_lmem_pages_migrate(struct intel_gt *gt,
+				    struct i915_address_space *vm,
+				    struct i915_deps *deps,
+				    struct igt_spinner *spin,
+				    struct dma_fence *spin_fence)
 {
-	struct intel_gt *gt = arg;
 	struct drm_i915_private *i915 = gt->i915;
 	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma = NULL;
 	struct i915_gem_ww_ctx ww;
 	struct i915_request *rq;
 	int err;
@@ -165,6 +193,14 @@ static int igt_lmem_pages_migrate(void *arg)
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
+	if (vm) {
+		vma = i915_vma_instance(obj, vm, NULL);
+		if (IS_ERR(vma)) {
+			err = PTR_ERR(vma);
+			goto out_put;
+		}
+	}
+
 	/* Initial GPU fill, sync, CPU initialization. */
 	for_i915_gem_ww(&ww, err, true) {
 		err = i915_gem_object_lock(obj, &ww);
@@ -175,25 +211,23 @@ static int igt_lmem_pages_migrate(void *arg)
 		if (err)
 			continue;
 
-		err = intel_migrate_clear(&gt->migrate, &ww, NULL,
+		err = intel_migrate_clear(&gt->migrate, &ww, deps,
 					  obj->mm.pages->sgl, obj->cache_level,
 					  i915_gem_object_is_lmem(obj),
 					  0xdeadbeaf, &rq);
 		if (rq) {
 			dma_resv_add_excl_fence(obj->base.resv, &rq->fence);
+			i915_gem_object_set_moving_fence(obj, &rq->fence);
 			i915_request_put(rq);
 		}
 		if (err)
 			continue;
 
-		err = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE,
-					   5 * HZ);
-		if (err)
-			continue;
-
-		err = igt_fill_check_buffer(obj, true);
-		if (err)
-			continue;
+		if (!vma) {
+			err = igt_fill_check_buffer(obj, true);
+			if (err)
+				continue;
+		}
 	}
 	if (err)
 		goto out_put;
@@ -204,7 +238,7 @@ static int igt_lmem_pages_migrate(void *arg)
 	 */
 	for (i = 1; i <= 5; ++i) {
 		for_i915_gem_ww(&ww, err, true)
-			err = lmem_pages_migrate_one(&ww, obj);
+			err = lmem_pages_migrate_one(&ww, obj, vma);
 		if (err)
 			goto out_put;
 	}
@@ -213,12 +247,27 @@ static int igt_lmem_pages_migrate(void *arg)
 	if (err)
 		goto out_put;
 
+	if (spin) {
+		if (dma_fence_is_signaled(spin_fence)) {
+			pr_err("Spinner was terminated by hangcheck.\n");
+			err = -EBUSY;
+			goto out_unlock;
+		}
+		igt_spinner_end(spin);
+	}
+
 	/* Finally sync migration and check content. */
 	err = i915_gem_object_wait_migration(obj, true);
 	if (err)
 		goto out_unlock;
 
-	err = igt_fill_check_buffer(obj, false);
+	if (vma) {
+		err = i915_vma_wait_for_bind(vma);
+		if (err)
+			goto out_unlock;
+	} else {
+		err = igt_fill_check_buffer(obj, false);
+	}
 
 out_unlock:
 	i915_gem_object_unlock(obj);
@@ -231,6 +280,7 @@ static int igt_lmem_pages_migrate(void *arg)
 static int igt_lmem_pages_failsafe_migrate(void *arg)
 {
 	int fail_gpu, fail_alloc, ret;
+	struct intel_gt *gt = arg;
 
 	for (fail_gpu = 0; fail_gpu < 2; ++fail_gpu) {
 		for (fail_alloc = 0; fail_alloc < 2; ++fail_alloc) {
@@ -238,7 +288,118 @@ static int igt_lmem_pages_failsafe_migrate(void *arg)
 				fail_gpu, fail_alloc);
 			i915_ttm_migrate_set_failure_modes(fail_gpu,
 							   fail_alloc);
-			ret = igt_lmem_pages_migrate(arg);
+			ret = __igt_lmem_pages_migrate(gt, NULL, NULL, NULL, NULL);
+			if (ret)
+				goto out_err;
+		}
+	}
+
+out_err:
+	i915_ttm_migrate_set_failure_modes(false, false);
+	return ret;
+}
+
+/*
+ * This subtest tests that unbinding at migration is indeed performed
+ * async. We launch a spinner and a number of migrations depending on
+ * that spinner to have terminated. Before each migration we bind a
+ * vma, which should then be async unbound by the migration operation.
+ * If we are able to schedule migrations without blocking while the
+ * spinner is still running, those unbinds are indeed async and non-
+ * blocking.
+ *
+ * Note that each async bind operation is awaiting the previous migration
+ * due to the moving fence resulting from the migration.
+ */
+static int igt_async_migrate(struct intel_gt *gt)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	struct i915_ppgtt *ppgtt;
+	struct igt_spinner spin;
+	int err;
+
+	ppgtt = i915_ppgtt_create(gt, 0);
+	if (IS_ERR(ppgtt))
+		return PTR_ERR(ppgtt);
+
+	if (igt_spinner_init(&spin, gt)) {
+		err = -ENOMEM;
+		goto out_spin;
+	}
+
+	for_each_engine(engine, gt, id) {
+		struct ttm_operation_ctx ctx = {
+			.interruptible = true
+		};
+		struct dma_fence *spin_fence;
+		struct intel_context *ce;
+		struct i915_request *rq;
+		struct i915_deps deps;
+
+		ce = intel_context_create(engine);
+		if (IS_ERR(ce)) {
+			err = PTR_ERR(ce);
+			goto out_ce;
+		}
+
+		/*
+		 * Use MI_NOOP, making the spinner non-preemptible. If there
+		 * is a code path where we fail async operation due to the
+		 * running spinner, we will block and fail to end the
+		 * spinner resulting in a deadlock. But with a non-
+		 * preemptible spinner, hangcheck will terminate the spinner
+		 * for us, and we will later detect that and fail the test.
+		 */
+		rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
+		intel_context_put(ce);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			goto out_ce;
+		}
+
+		i915_deps_init(&deps, GFP_KERNEL);
+		err = i915_deps_add_dependency(&deps, &rq->fence, &ctx);
+		spin_fence = dma_fence_get(&rq->fence);
+		i915_request_add(rq);
+		if (err)
+			goto out_ce;
+
+		err = __igt_lmem_pages_migrate(gt, &ppgtt->vm, &deps, &spin,
+					       spin_fence);
+		i915_deps_fini(&deps);
+		dma_fence_put(spin_fence);
+		if (err)
+			goto out_ce;
+	}
+
+out_ce:
+	igt_spinner_fini(&spin);
+out_spin:
+	i915_vm_put(&ppgtt->vm);
+
+	return err;
+}
+
+/*
+ * Setting ASYNC_FAIL_ALLOC to 2 will simulate memory allocation failure while
+ * arming the migration error check and block async migration. This
+ * will cause us to deadlock and hangcheck will terminate the spinner
+ * causing the test to fail.
+ */
+#define ASYNC_FAIL_ALLOC 1
+static int igt_lmem_async_migrate(void *arg)
+{
+	int fail_gpu, fail_alloc, ret;
+	struct intel_gt *gt = arg;
+
+	for (fail_gpu = 0; fail_gpu < 2; ++fail_gpu) {
+		for (fail_alloc = 0; fail_alloc < ASYNC_FAIL_ALLOC; ++fail_alloc) {
+			pr_info("Simulated failure modes: gpu: %d, alloc: %d\n",
+				fail_gpu, fail_alloc);
+			i915_ttm_migrate_set_failure_modes(fail_gpu,
+							   fail_alloc);
+			ret = igt_async_migrate(gt);
 			if (ret)
 				goto out_err;
 		}
@@ -256,6 +417,7 @@ int i915_gem_migrate_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(igt_lmem_create_migrate),
 		SUBTEST(igt_same_create_migrate),
 		SUBTEST(igt_lmem_pages_failsafe_migrate),
+		SUBTEST(igt_lmem_async_migrate),
 	};
 
 	if (!HAS_LMEM(i915))
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index c6291429b00cf1157b259ce0587149549283c017..8ae1a1530bd80c3f53fe7dba8c712c260d9ed6d4 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -6,11 +6,13 @@
 
 #include <linux/prime_numbers.h>
 
+#include "gem/i915_gem_internal.h"
+#include "gem/i915_gem_region.h"
 #include "gt/intel_engine_pm.h"
 #include "gt/intel_gpu_commands.h"
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_pm.h"
-#include "gem/i915_gem_region.h"
+
 #include "huge_gem_object.h"
 #include "i915_selftest.h"
 #include "selftests/i915_random.h"
@@ -166,7 +168,9 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj,
 	kunmap(p);
 
 out:
+	i915_gem_object_lock(obj, NULL);
 	__i915_vma_put(vma);
+	i915_gem_object_unlock(obj);
 	return err;
 }
 
@@ -261,7 +265,9 @@ static int check_partial_mappings(struct drm_i915_gem_object *obj,
 		if (err)
 			return err;
 
+		i915_gem_object_lock(obj, NULL);
 		__i915_vma_put(vma);
+		i915_gem_object_unlock(obj);
 
 		if (igt_timeout(end_time,
 				"%s: timed out after tiling=%d stride=%d\n",
@@ -307,7 +313,7 @@ static int igt_partial_tiling(void *arg)
 	int tiling;
 	int err;
 
-	if (!i915_ggtt_has_aperture(&i915->ggtt))
+	if (!i915_ggtt_has_aperture(to_gt(i915)->ggtt))
 		return 0;
 
 	/* We want to check the page mapping and fencing of a large object
@@ -320,7 +326,7 @@ static int igt_partial_tiling(void *arg)
 
 	obj = huge_gem_object(i915,
 			      nreal << PAGE_SHIFT,
-			      (1 + next_prime_number(i915->ggtt.vm.total >> PAGE_SHIFT)) << PAGE_SHIFT);
+			      (1 + next_prime_number(to_gt(i915)->ggtt->vm.total >> PAGE_SHIFT)) << PAGE_SHIFT);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
@@ -366,10 +372,10 @@ static int igt_partial_tiling(void *arg)
 		tile.tiling = tiling;
 		switch (tiling) {
 		case I915_TILING_X:
-			tile.swizzle = i915->ggtt.bit_6_swizzle_x;
+			tile.swizzle = to_gt(i915)->ggtt->bit_6_swizzle_x;
 			break;
 		case I915_TILING_Y:
-			tile.swizzle = i915->ggtt.bit_6_swizzle_y;
+			tile.swizzle = to_gt(i915)->ggtt->bit_6_swizzle_y;
 			break;
 		}
 
@@ -440,7 +446,7 @@ static int igt_smoke_tiling(void *arg)
 	IGT_TIMEOUT(end);
 	int err;
 
-	if (!i915_ggtt_has_aperture(&i915->ggtt))
+	if (!i915_ggtt_has_aperture(to_gt(i915)->ggtt))
 		return 0;
 
 	/*
@@ -457,7 +463,7 @@ static int igt_smoke_tiling(void *arg)
 
 	obj = huge_gem_object(i915,
 			      nreal << PAGE_SHIFT,
-			      (1 + next_prime_number(i915->ggtt.vm.total >> PAGE_SHIFT)) << PAGE_SHIFT);
+			      (1 + next_prime_number(to_gt(i915)->ggtt->vm.total >> PAGE_SHIFT)) << PAGE_SHIFT);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
@@ -486,10 +492,10 @@ static int igt_smoke_tiling(void *arg)
 			break;
 
 		case I915_TILING_X:
-			tile.swizzle = i915->ggtt.bit_6_swizzle_x;
+			tile.swizzle = to_gt(i915)->ggtt->bit_6_swizzle_x;
 			break;
 		case I915_TILING_Y:
-			tile.swizzle = i915->ggtt.bit_6_swizzle_y;
+			tile.swizzle = to_gt(i915)->ggtt->bit_6_swizzle_y;
 			break;
 		}
 
@@ -856,6 +862,7 @@ static int wc_check(struct drm_i915_gem_object *obj)
 
 static bool can_mmap(struct drm_i915_gem_object *obj, enum i915_mmap_type type)
 {
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 	bool no_map;
 
 	if (obj->ops->mmap_offset)
@@ -864,7 +871,7 @@ static bool can_mmap(struct drm_i915_gem_object *obj, enum i915_mmap_type type)
 		return false;
 
 	if (type == I915_MMAP_TYPE_GTT &&
-	    !i915_ggtt_has_aperture(&to_i915(obj->base.dev)->ggtt))
+	    !i915_ggtt_has_aperture(to_gt(i915)->ggtt))
 		return false;
 
 	i915_gem_object_lock(obj, NULL);
@@ -1351,7 +1358,9 @@ static int __igt_mmap_revoke(struct drm_i915_private *i915,
 	 * for other objects. Ergo we have to revoke the previous mmap PTE
 	 * access as it no longer points to the same object.
 	 */
+	i915_gem_object_lock(obj, NULL);
 	err = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE);
+	i915_gem_object_unlock(obj);
 	if (err) {
 		pr_err("Failed to unbind object!\n");
 		goto out_unmap;
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c
index 740ee8086a275068862827ae546130a7fc101e8c..fe0a890775e21ad545ac9a758d3d2e4b3ec60520 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c
@@ -43,7 +43,7 @@ static int igt_gem_huge(void *arg)
 
 	obj = huge_gem_object(i915,
 			      nreal * PAGE_SIZE,
-			      i915->ggtt.vm.total + PAGE_SIZE);
+			      to_gt(i915)->ggtt->vm.total + PAGE_SIZE);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c
index b35c1219c8521268a10b8f3bee35f6c93c75fa5c..3c55e77b0f1b00c2fffeb3323938274bbd382f7c 100644
--- a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c
+++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c
@@ -7,6 +7,7 @@
 #include "igt_gem_utils.h"
 
 #include "gem/i915_gem_context.h"
+#include "gem/i915_gem_internal.h"
 #include "gem/i915_gem_pm.h"
 #include "gt/intel_context.h"
 #include "gt/intel_gpu_commands.h"
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
index c0a8ef3680446e9c3ee94d9a79793f5c225bbcc8..6d6082b5f31f96d27ef516c48916326701e353e1 100644
--- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
@@ -4,6 +4,7 @@
  * Copyright © 2016 Intel Corporation
  */
 
+#include "i915_file_private.h"
 #include "mock_context.h"
 #include "selftests/mock_drm.h"
 #include "selftests/mock_gtt.h"
diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
index 8471758b3ef4e1930a4b8d4bfed470ef5a46c3a9..871fe7bda0e03c50851d3651daa7333d88f31b54 100644
--- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
@@ -5,6 +5,8 @@
 
 #include <linux/log2.h>
 
+#include "gem/i915_gem_internal.h"
+
 #include "gen6_ppgtt.h"
 #include "i915_scatterlist.h"
 #include "i915_trace.h"
@@ -106,17 +108,17 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
 }
 
 static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
-				      struct i915_vma *vma,
+				      struct i915_vma_resource *vma_res,
 				      enum i915_cache_level cache_level,
 				      u32 flags)
 {
 	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
 	struct i915_page_directory * const pd = ppgtt->pd;
-	unsigned int first_entry = vma->node.start / I915_GTT_PAGE_SIZE;
+	unsigned int first_entry = vma_res->start / I915_GTT_PAGE_SIZE;
 	unsigned int act_pt = first_entry / GEN6_PTES;
 	unsigned int act_pte = first_entry % GEN6_PTES;
 	const u32 pte_encode = vm->pte_encode(0, cache_level, flags);
-	struct sgt_dma iter = sgt_dma(vma);
+	struct sgt_dma iter = sgt_dma(vma_res);
 	gen6_pte_t *vaddr;
 
 	GEM_BUG_ON(!pd->entry[act_pt]);
@@ -142,7 +144,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 		}
 	} while (1);
 
-	vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
+	vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE;
 }
 
 static void gen6_flush_pd(struct gen6_ppgtt *ppgtt, u64 start, u64 end)
@@ -273,13 +275,13 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
 
 static void pd_vma_bind(struct i915_address_space *vm,
 			struct i915_vm_pt_stash *stash,
-			struct i915_vma *vma,
+			struct i915_vma_resource *vma_res,
 			enum i915_cache_level cache_level,
 			u32 unused)
 {
 	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
-	struct gen6_ppgtt *ppgtt = vma->private;
-	u32 ggtt_offset = i915_ggtt_offset(vma) / I915_GTT_PAGE_SIZE;
+	struct gen6_ppgtt *ppgtt = vma_res->private;
+	u32 ggtt_offset = vma_res->start / I915_GTT_PAGE_SIZE;
 
 	ppgtt->pp_dir = ggtt_offset * sizeof(gen6_pte_t) << 10;
 	ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset;
@@ -287,9 +289,10 @@ static void pd_vma_bind(struct i915_address_space *vm,
 	gen6_flush_pd(ppgtt, 0, ppgtt->base.vm.total);
 }
 
-static void pd_vma_unbind(struct i915_address_space *vm, struct i915_vma *vma)
+static void pd_vma_unbind(struct i915_address_space *vm,
+			  struct i915_vma_resource *vma_res)
 {
-	struct gen6_ppgtt *ppgtt = vma->private;
+	struct gen6_ppgtt *ppgtt = vma_res->private;
 	struct i915_page_directory * const pd = ppgtt->base.pd;
 	struct i915_page_table *pt;
 	unsigned int pde;
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index b012c50f7ce7871c655af9e4e9051867f94371dc..c43e724afa9f67311694a765f1e1c5a967202af8 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -453,20 +453,21 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
 	return idx;
 }
 
-static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
+static void gen8_ppgtt_insert_huge(struct i915_address_space *vm,
+				   struct i915_vma_resource *vma_res,
 				   struct sgt_dma *iter,
 				   enum i915_cache_level cache_level,
 				   u32 flags)
 {
 	const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
 	unsigned int rem = sg_dma_len(iter->sg);
-	u64 start = vma->node.start;
+	u64 start = vma_res->start;
 
-	GEM_BUG_ON(!i915_vm_is_4lvl(vma->vm));
+	GEM_BUG_ON(!i915_vm_is_4lvl(vm));
 
 	do {
 		struct i915_page_directory * const pdp =
-			gen8_pdp_for_page_address(vma->vm, start);
+			gen8_pdp_for_page_address(vm, start);
 		struct i915_page_directory * const pd =
 			i915_pd_entry(pdp, __gen8_pte_index(start, 2));
 		gen8_pte_t encode = pte_encode;
@@ -475,7 +476,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
 		gen8_pte_t *vaddr;
 		u16 index;
 
-		if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
+		if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
 		    IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) &&
 		    rem >= I915_GTT_PAGE_SIZE_2M &&
 		    !__gen8_pte_index(start, 0)) {
@@ -492,7 +493,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
 			page_size = I915_GTT_PAGE_SIZE;
 
 			if (!index &&
-			    vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K &&
+			    vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_64K &&
 			    IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
 			    (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
 			     rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE))
@@ -541,9 +542,9 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
 		 */
 		if (maybe_64K != -1 &&
 		    (index == I915_PDES ||
-		     (i915_vm_has_scratch_64K(vma->vm) &&
-		      !iter->sg && IS_ALIGNED(vma->node.start +
-					      vma->node.size,
+		     (i915_vm_has_scratch_64K(vm) &&
+		      !iter->sg && IS_ALIGNED(vma_res->start +
+					      vma_res->node_size,
 					      I915_GTT_PAGE_SIZE_2M)))) {
 			vaddr = px_vaddr(pd);
 			vaddr[maybe_64K] |= GEN8_PDE_IPS_64K;
@@ -559,10 +560,10 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
 			 * instead - which we detect as missing results during
 			 * selftests.
 			 */
-			if (I915_SELFTEST_ONLY(vma->vm->scrub_64K)) {
+			if (I915_SELFTEST_ONLY(vm->scrub_64K)) {
 				u16 i;
 
-				encode = vma->vm->scratch[0]->encode;
+				encode = vm->scratch[0]->encode;
 				vaddr = px_vaddr(i915_pt_entry(pd, maybe_64K));
 
 				for (i = 1; i < index; i += 16)
@@ -572,22 +573,22 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
 			}
 		}
 
-		vma->page_sizes.gtt |= page_size;
+		vma_res->page_sizes_gtt |= page_size;
 	} while (iter->sg && sg_dma_len(iter->sg));
 }
 
 static void gen8_ppgtt_insert(struct i915_address_space *vm,
-			      struct i915_vma *vma,
+			      struct i915_vma_resource *vma_res,
 			      enum i915_cache_level cache_level,
 			      u32 flags)
 {
 	struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm);
-	struct sgt_dma iter = sgt_dma(vma);
+	struct sgt_dma iter = sgt_dma(vma_res);
 
-	if (vma->page_sizes.sg > I915_GTT_PAGE_SIZE) {
-		gen8_ppgtt_insert_huge(vma, &iter, cache_level, flags);
+	if (vma_res->bi.page_sizes.sg > I915_GTT_PAGE_SIZE) {
+		gen8_ppgtt_insert_huge(vm, vma_res, &iter, cache_level, flags);
 	} else  {
-		u64 idx = vma->node.start >> GEN8_PTE_SHIFT;
+		u64 idx = vma_res->start >> GEN8_PTE_SHIFT;
 
 		do {
 			struct i915_page_directory * const pdp =
@@ -597,7 +598,7 @@ static void gen8_ppgtt_insert(struct i915_address_space *vm,
 						    cache_level, flags);
 		} while (idx);
 
-		vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
+		vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE;
 	}
 }
 
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index ba083d800a0894f1b5faafd0c3acc341cbf24c8f..5d0ec7c49b6a5dbe358cf4ca593cf9e964ab1fd6 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -79,7 +79,8 @@ static int intel_context_active_acquire(struct intel_context *ce)
 
 	__i915_active_acquire(&ce->active);
 
-	if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine))
+	if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine) ||
+	    intel_context_is_parallel(ce))
 		return 0;
 
 	/* Preallocate tracking nodes */
@@ -563,7 +564,6 @@ void intel_context_bind_parent_child(struct intel_context *parent,
 	 * Callers responsibility to validate that this function is used
 	 * correctly but we use GEM_BUG_ON here ensure that they do.
 	 */
-	GEM_BUG_ON(!intel_engine_uses_guc(parent->engine));
 	GEM_BUG_ON(intel_context_is_pinned(parent));
 	GEM_BUG_ON(intel_context_is_child(parent));
 	GEM_BUG_ON(intel_context_is_pinned(child));
diff --git a/drivers/gpu/drm/i915/gt/intel_context_sseu.c b/drivers/gpu/drm/i915/gt/intel_context_sseu.c
index e86d8255feec4177de31b404014d479d74bf1781..ece16c2b5b8ee02674553e8e0bb1fa9b6a16273b 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_context_sseu.c
@@ -9,6 +9,7 @@
 #include "intel_engine_pm.h"
 #include "intel_gpu_commands.h"
 #include "intel_lrc.h"
+#include "intel_lrc_reg.h"
 #include "intel_ring.h"
 #include "intel_sseu.h"
 
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index 0e353d8c2bc8ac52eb5da67b61b6125fa1f8a0c0..be4b1e65442f520b4a50051d58d030612be80cfb 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -182,6 +182,8 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
 #define I915_HWS_CSB_BUF0_INDEX		0x10
 #define I915_HWS_CSB_WRITE_INDEX	0x1f
 #define ICL_HWS_CSB_WRITE_INDEX		0x2f
+#define INTEL_HWS_CSB_WRITE_INDEX(__i915) \
+	(GRAPHICS_VER(__i915) >= 11 ? ICL_HWS_CSB_WRITE_INDEX : I915_HWS_CSB_WRITE_INDEX)
 
 void intel_engine_stop(struct intel_engine_cs *engine);
 void intel_engine_cleanup(struct intel_engine_cs *engine);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 84ad09fb9b8b8427d8763ddcf82364e3255ea426..e53008b4dd0528556f7f0b82f160f006c67aa66a 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -6,6 +6,7 @@
 #include <drm/drm_print.h>
 
 #include "gem/i915_gem_context.h"
+#include "gem/i915_gem_internal.h"
 #include "gt/intel_gt_regs.h"
 
 #include "i915_cmd_parser.h"
@@ -1229,17 +1230,6 @@ void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine)
 	ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
 }
 
-const char *i915_cache_level_str(struct drm_i915_private *i915, int type)
-{
-	switch (type) {
-	case I915_CACHE_NONE: return " uncached";
-	case I915_CACHE_LLC: return HAS_LLC(i915) ? " LLC" : " snooped";
-	case I915_CACHE_L3_LLC: return " L3+LLC";
-	case I915_CACHE_WT: return " WT";
-	default: return "";
-	}
-}
-
 static u32
 read_subslice_reg(const struct intel_engine_cs *engine,
 		  int slice, int subslice, i915_reg_t reg)
@@ -1710,18 +1700,15 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
 
 static void print_request_ring(struct drm_printer *m, struct i915_request *rq)
 {
-	struct i915_vma_snapshot *vsnap = &rq->batch_snapshot;
+	struct i915_vma_resource *vma_res = rq->batch_res;
 	void *ring;
 	int size;
 
-	if (!i915_vma_snapshot_present(vsnap))
-		vsnap = NULL;
-
 	drm_printf(m,
 		   "[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]:\n",
 		   rq->head, rq->postfix, rq->tail,
-		   vsnap ? upper_32_bits(vsnap->gtt_offset) : ~0u,
-		   vsnap ? lower_32_bits(vsnap->gtt_offset) : ~0u);
+		   vma_res ? upper_32_bits(vma_res->start) : ~0u,
+		   vma_res ? lower_32_bits(vma_res->start) : ~0u);
 
 	size = rq->tail - rq->head;
 	if (rq->tail < rq->head)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_regs.h b/drivers/gpu/drm/i915/gt/intel_engine_regs.h
index e9fec6214073e69949838bd9be741b98e2d04c4b..0bf8b45c93194b61ca481f67f59ffdb0f7d034ac 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_regs.h
@@ -70,6 +70,12 @@
 #define RING_NOPID(base)			_MMIO((base) + 0x94)
 #define RING_HWSTAM(base)			_MMIO((base) + 0x98)
 #define RING_MI_MODE(base)			_MMIO((base) + 0x9c)
+#define   ASYNC_FLIP_PERF_DISABLE		REG_BIT(14)
+#define   MI_FLUSH_ENABLE			REG_BIT(12)
+#define   TGL_NESTED_BB_EN			REG_BIT(12)
+#define   MODE_IDLE				REG_BIT(9)
+#define   STOP_RING				REG_BIT(8)
+#define   VS_TIMER_DISPATCH			REG_BIT(6)
 #define RING_IMR(base)				_MMIO((base) + 0xa8)
 #define RING_EIR(base)				_MMIO((base) + 0xb0)
 #define RING_EMR(base)				_MMIO((base) + 0xb4)
@@ -211,8 +217,25 @@
 #define GEN8_RING_CS_GPR(base, n)		_MMIO((base) + 0x600 + (n) * 8)
 #define GEN8_RING_CS_GPR_UDW(base, n)		_MMIO((base) + 0x600 + (n) * 8 + 4)
 
+#define GEN11_VCS_SFC_FORCED_LOCK(base)		_MMIO((base) + 0x88c)
+#define   GEN11_VCS_SFC_FORCED_LOCK_BIT		(1 << 0)
+#define GEN11_VCS_SFC_LOCK_STATUS(base)		_MMIO((base) + 0x890)
+#define   GEN11_VCS_SFC_USAGE_BIT		(1 << 0)
+#define   GEN11_VCS_SFC_LOCK_ACK_BIT		(1 << 1)
+
+#define GEN11_VECS_SFC_FORCED_LOCK(base)	_MMIO((base) + 0x201c)
+#define   GEN11_VECS_SFC_FORCED_LOCK_BIT	(1 << 0)
+#define GEN11_VECS_SFC_LOCK_ACK(base)		_MMIO((base) + 0x2018)
+#define   GEN11_VECS_SFC_LOCK_ACK_BIT		(1 << 0)
+#define GEN11_VECS_SFC_USAGE(base)		_MMIO((base) + 0x2014)
+#define   GEN11_VECS_SFC_USAGE_BIT		(1 << 0)
+
 #define RING_HWS_PGA_GEN6(base)	_MMIO((base) + 0x2080)
 
+#define GEN12_HCP_SFC_LOCK_STATUS(base)		_MMIO((base) + 0x2914)
+#define   GEN12_HCP_SFC_LOCK_ACK_BIT		REG_BIT(1)
+#define   GEN12_HCP_SFC_USAGE_BIT		REG_BIT(0)
+
 #define VDBOX_CGCTL3F10(base)			_MMIO((base) + 0x3f10)
 #define   IECPUNIT_CLKGATE_DIS			REG_BIT(22)
 
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 49e26d23ce1f4d066d230e33a65d93542d802ef8..961d795220a308f9304c269f79e2d8f0ec8fe8b4 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -2601,6 +2601,43 @@ static void execlists_context_cancel_request(struct intel_context *ce,
 				      current->comm);
 }
 
+static struct intel_context *
+execlists_create_parallel(struct intel_engine_cs **engines,
+			  unsigned int num_siblings,
+			  unsigned int width)
+{
+	struct intel_context *parent = NULL, *ce, *err;
+	int i;
+
+	GEM_BUG_ON(num_siblings != 1);
+
+	for (i = 0; i < width; ++i) {
+		ce = intel_context_create(engines[i]);
+		if (IS_ERR(ce)) {
+			err = ce;
+			goto unwind;
+		}
+
+		if (i == 0)
+			parent = ce;
+		else
+			intel_context_bind_parent_child(parent, ce);
+	}
+
+	parent->parallel.fence_context = dma_fence_context_alloc(1);
+
+	intel_context_set_nopreempt(parent);
+	for_each_child(parent, ce)
+		intel_context_set_nopreempt(ce);
+
+	return parent;
+
+unwind:
+	if (parent)
+		intel_context_put(parent);
+	return err;
+}
+
 static const struct intel_context_ops execlists_context_ops = {
 	.flags = COPS_HAS_INFLIGHT,
 
@@ -2619,6 +2656,7 @@ static const struct intel_context_ops execlists_context_ops = {
 	.reset = lrc_reset,
 	.destroy = lrc_destroy,
 
+	.create_parallel = execlists_create_parallel,
 	.create_virtual = execlists_create_virtual,
 };
 
@@ -3465,7 +3503,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
 		(u64 *)&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
 
 	execlists->csb_write =
-		&engine->status_page.addr[intel_hws_csb_write_index(i915)];
+		&engine->status_page.addr[INTEL_HWS_CSB_WRITE_INDEX(i915)];
 
 	if (GRAPHICS_VER(i915) < 11)
 		execlists->csb_size = GEN8_CSB_ENTRIES;
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index d2922f64d1c856a7c4e288cb127daabe89c9956e..8850d4e0f9cca147debd728e2ffd0d0608f3df74 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -87,7 +87,7 @@ int i915_ggtt_init_hw(struct drm_i915_private *i915)
 	 * beyond the end of the batch buffer, across the page boundary,
 	 * and beyond the end of the GTT if we do not provide a guard.
 	 */
-	ret = ggtt_init_hw(&i915->ggtt);
+	ret = ggtt_init_hw(to_gt(i915)->ggtt);
 	if (ret)
 		return ret;
 
@@ -130,22 +130,51 @@ void i915_ggtt_suspend_vm(struct i915_address_space *vm)
 
 	drm_WARN_ON(&vm->i915->drm, !vm->is_ggtt && !vm->is_dpt);
 
+retry:
+	i915_gem_drain_freed_objects(vm->i915);
+
 	mutex_lock(&vm->mutex);
 
 	/* Skip rewriting PTE on VMA unbind. */
 	open = atomic_xchg(&vm->open, 0);
 
 	list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) {
+		struct drm_i915_gem_object *obj = vma->obj;
+
 		GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
-		i915_vma_wait_for_bind(vma);
 
-		if (i915_vma_is_pinned(vma))
+		if (i915_vma_is_pinned(vma) || !i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
 			continue;
 
+		/* unlikely to race when GPU is idle, so no worry about slowpath.. */
+		if (WARN_ON(!i915_gem_object_trylock(obj, NULL))) {
+			/*
+			 * No dead objects should appear here, GPU should be
+			 * completely idle, and userspace suspended
+			 */
+			i915_gem_object_get(obj);
+
+			atomic_set(&vm->open, open);
+			mutex_unlock(&vm->mutex);
+
+			i915_gem_object_lock(obj, NULL);
+			open = i915_vma_unbind(vma);
+			i915_gem_object_unlock(obj);
+
+			GEM_WARN_ON(open);
+
+			i915_gem_object_put(obj);
+			goto retry;
+		}
+
 		if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) {
-			__i915_vma_evict(vma);
+			i915_vma_wait_for_bind(vma);
+
+			__i915_vma_evict(vma, false);
 			drm_mm_remove_node(&vma->node);
 		}
+
+		i915_gem_object_unlock(obj);
 	}
 
 	vm->clear_range(vm, 0, vm->total);
@@ -236,7 +265,7 @@ static void gen8_ggtt_insert_page(struct i915_address_space *vm,
 }
 
 static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
-				     struct i915_vma *vma,
+				     struct i915_vma_resource *vma_res,
 				     enum i915_cache_level level,
 				     u32 flags)
 {
@@ -253,10 +282,10 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 	 */
 
 	gte = (gen8_pte_t __iomem *)ggtt->gsm;
-	gte += vma->node.start / I915_GTT_PAGE_SIZE;
-	end = gte + vma->node.size / I915_GTT_PAGE_SIZE;
+	gte += vma_res->start / I915_GTT_PAGE_SIZE;
+	end = gte + vma_res->node_size / I915_GTT_PAGE_SIZE;
 
-	for_each_sgt_daddr(addr, iter, vma->pages)
+	for_each_sgt_daddr(addr, iter, vma_res->bi.pages)
 		gen8_set_pte(gte++, pte_encode | addr);
 	GEM_BUG_ON(gte > end);
 
@@ -293,7 +322,7 @@ static void gen6_ggtt_insert_page(struct i915_address_space *vm,
  * through the GMADR mapped BAR (i915->mm.gtt->gtt).
  */
 static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
-				     struct i915_vma *vma,
+				     struct i915_vma_resource *vma_res,
 				     enum i915_cache_level level,
 				     u32 flags)
 {
@@ -304,10 +333,10 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
 	dma_addr_t addr;
 
 	gte = (gen6_pte_t __iomem *)ggtt->gsm;
-	gte += vma->node.start / I915_GTT_PAGE_SIZE;
-	end = gte + vma->node.size / I915_GTT_PAGE_SIZE;
+	gte += vma_res->start / I915_GTT_PAGE_SIZE;
+	end = gte + vma_res->node_size / I915_GTT_PAGE_SIZE;
 
-	for_each_sgt_daddr(addr, iter, vma->pages)
+	for_each_sgt_daddr(addr, iter, vma_res->bi.pages)
 		iowrite32(vm->pte_encode(addr, level, flags), gte++);
 	GEM_BUG_ON(gte > end);
 
@@ -390,7 +419,7 @@ static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm,
 
 struct insert_entries {
 	struct i915_address_space *vm;
-	struct i915_vma *vma;
+	struct i915_vma_resource *vma_res;
 	enum i915_cache_level level;
 	u32 flags;
 };
@@ -399,18 +428,18 @@ static int bxt_vtd_ggtt_insert_entries__cb(void *_arg)
 {
 	struct insert_entries *arg = _arg;
 
-	gen8_ggtt_insert_entries(arg->vm, arg->vma, arg->level, arg->flags);
+	gen8_ggtt_insert_entries(arg->vm, arg->vma_res, arg->level, arg->flags);
 	bxt_vtd_ggtt_wa(arg->vm);
 
 	return 0;
 }
 
 static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm,
-					     struct i915_vma *vma,
+					     struct i915_vma_resource *vma_res,
 					     enum i915_cache_level level,
 					     u32 flags)
 {
-	struct insert_entries arg = { vm, vma, level, flags };
+	struct insert_entries arg = { vm, vma_res, level, flags };
 
 	stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL);
 }
@@ -449,14 +478,14 @@ static void i915_ggtt_insert_page(struct i915_address_space *vm,
 }
 
 static void i915_ggtt_insert_entries(struct i915_address_space *vm,
-				     struct i915_vma *vma,
+				     struct i915_vma_resource *vma_res,
 				     enum i915_cache_level cache_level,
 				     u32 unused)
 {
 	unsigned int flags = (cache_level == I915_CACHE_NONE) ?
 		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
 
-	intel_gtt_insert_sg_entries(vma->pages, vma->node.start >> PAGE_SHIFT,
+	intel_gtt_insert_sg_entries(vma_res->bi.pages, vma_res->start >> PAGE_SHIFT,
 				    flags);
 }
 
@@ -468,30 +497,32 @@ static void i915_ggtt_clear_range(struct i915_address_space *vm,
 
 static void ggtt_bind_vma(struct i915_address_space *vm,
 			  struct i915_vm_pt_stash *stash,
-			  struct i915_vma *vma,
+			  struct i915_vma_resource *vma_res,
 			  enum i915_cache_level cache_level,
 			  u32 flags)
 {
-	struct drm_i915_gem_object *obj = vma->obj;
 	u32 pte_flags;
 
-	if (i915_vma_is_bound(vma, ~flags & I915_VMA_BIND_MASK))
+	if (vma_res->bound_flags & (~flags & I915_VMA_BIND_MASK))
 		return;
 
+	vma_res->bound_flags |= flags;
+
 	/* Applicable to VLV (gen8+ do not support RO in the GGTT) */
 	pte_flags = 0;
-	if (i915_gem_object_is_readonly(obj))
+	if (vma_res->bi.readonly)
 		pte_flags |= PTE_READ_ONLY;
-	if (i915_gem_object_is_lmem(obj))
+	if (vma_res->bi.lmem)
 		pte_flags |= PTE_LM;
 
-	vm->insert_entries(vm, vma, cache_level, pte_flags);
-	vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
+	vm->insert_entries(vm, vma_res, cache_level, pte_flags);
+	vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE;
 }
 
-static void ggtt_unbind_vma(struct i915_address_space *vm, struct i915_vma *vma)
+static void ggtt_unbind_vma(struct i915_address_space *vm,
+			    struct i915_vma_resource *vma_res)
 {
-	vm->clear_range(vm, vma->node.start, vma->size);
+	vm->clear_range(vm, vma_res->start, vma_res->vma_size);
 }
 
 static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt)
@@ -505,7 +536,7 @@ static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt)
 	GEM_BUG_ON(ggtt->vm.total <= GUC_GGTT_TOP);
 	size = ggtt->vm.total - GUC_GGTT_TOP;
 
-	ret = i915_gem_gtt_reserve(&ggtt->vm, &ggtt->uc_fw, size,
+	ret = i915_gem_gtt_reserve(&ggtt->vm, NULL, &ggtt->uc_fw, size,
 				   GUC_GGTT_TOP, I915_COLOR_UNEVICTABLE,
 				   PIN_NOEVICT);
 	if (ret)
@@ -624,7 +655,7 @@ static int init_ggtt(struct i915_ggtt *ggtt)
 
 static void aliasing_gtt_bind_vma(struct i915_address_space *vm,
 				  struct i915_vm_pt_stash *stash,
-				  struct i915_vma *vma,
+				  struct i915_vma_resource *vma_res,
 				  enum i915_cache_level cache_level,
 				  u32 flags)
 {
@@ -632,25 +663,27 @@ static void aliasing_gtt_bind_vma(struct i915_address_space *vm,
 
 	/* Currently applicable only to VLV */
 	pte_flags = 0;
-	if (i915_gem_object_is_readonly(vma->obj))
+	if (vma_res->bi.readonly)
 		pte_flags |= PTE_READ_ONLY;
 
 	if (flags & I915_VMA_LOCAL_BIND)
 		ppgtt_bind_vma(&i915_vm_to_ggtt(vm)->alias->vm,
-			       stash, vma, cache_level, flags);
+			       stash, vma_res, cache_level, flags);
 
 	if (flags & I915_VMA_GLOBAL_BIND)
-		vm->insert_entries(vm, vma, cache_level, pte_flags);
+		vm->insert_entries(vm, vma_res, cache_level, pte_flags);
+
+	vma_res->bound_flags |= flags;
 }
 
 static void aliasing_gtt_unbind_vma(struct i915_address_space *vm,
-				    struct i915_vma *vma)
+				    struct i915_vma_resource *vma_res)
 {
-	if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
-		vm->clear_range(vm, vma->node.start, vma->size);
+	if (vma_res->bound_flags & I915_VMA_GLOBAL_BIND)
+		vm->clear_range(vm, vma_res->start, vma_res->vma_size);
 
-	if (i915_vma_is_bound(vma, I915_VMA_LOCAL_BIND))
-		ppgtt_unbind_vma(&i915_vm_to_ggtt(vm)->alias->vm, vma);
+	if (vma_res->bound_flags & I915_VMA_LOCAL_BIND)
+		ppgtt_unbind_vma(&i915_vm_to_ggtt(vm)->alias->vm, vma_res);
 }
 
 static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
@@ -723,14 +756,14 @@ int i915_init_ggtt(struct drm_i915_private *i915)
 {
 	int ret;
 
-	ret = init_ggtt(&i915->ggtt);
+	ret = init_ggtt(to_gt(i915)->ggtt);
 	if (ret)
 		return ret;
 
 	if (INTEL_PPGTT(i915) == INTEL_PPGTT_ALIASING) {
-		ret = init_aliasing_ppgtt(&i915->ggtt);
+		ret = init_aliasing_ppgtt(to_gt(i915)->ggtt);
 		if (ret)
-			cleanup_init_ggtt(&i915->ggtt);
+			cleanup_init_ggtt(to_gt(i915)->ggtt);
 	}
 
 	return 0;
@@ -743,11 +776,21 @@ static void ggtt_cleanup_hw(struct i915_ggtt *ggtt)
 	atomic_set(&ggtt->vm.open, 0);
 
 	flush_workqueue(ggtt->vm.i915->wq);
+	i915_gem_drain_freed_objects(ggtt->vm.i915);
 
 	mutex_lock(&ggtt->vm.mutex);
 
-	list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link)
+	list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) {
+		struct drm_i915_gem_object *obj = vma->obj;
+		bool trylock;
+
+		trylock = i915_gem_object_trylock(obj, NULL);
+		WARN_ON(!trylock);
+
 		WARN_ON(__i915_vma_unbind(vma));
+		if (trylock)
+			i915_gem_object_unlock(obj);
+	}
 
 	if (drm_mm_node_allocated(&ggtt->error_capture))
 		drm_mm_remove_node(&ggtt->error_capture);
@@ -773,7 +816,7 @@ static void ggtt_cleanup_hw(struct i915_ggtt *ggtt)
  */
 void i915_ggtt_driver_release(struct drm_i915_private *i915)
 {
-	struct i915_ggtt *ggtt = &i915->ggtt;
+	struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
 
 	fini_aliasing_ppgtt(ggtt);
 
@@ -788,7 +831,7 @@ void i915_ggtt_driver_release(struct drm_i915_private *i915)
  */
 void i915_ggtt_driver_late_release(struct drm_i915_private *i915)
 {
-	struct i915_ggtt *ggtt = &i915->ggtt;
+	struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
 
 	GEM_WARN_ON(kref_read(&ggtt->vm.resv_ref) != 1);
 	dma_resv_fini(&ggtt->vm._resv);
@@ -1209,7 +1252,7 @@ int i915_ggtt_probe_hw(struct drm_i915_private *i915)
 {
 	int ret;
 
-	ret = ggtt_probe_hw(&i915->ggtt, to_gt(i915));
+	ret = ggtt_probe_hw(to_gt(i915)->ggtt, to_gt(i915));
 	if (ret)
 		return ret;
 
@@ -1281,7 +1324,7 @@ bool i915_ggtt_resume_vm(struct i915_address_space *vm)
 			atomic_read(&vma->flags) & I915_VMA_BIND_MASK;
 
 		GEM_BUG_ON(!was_bound);
-		vma->ops->bind_vma(vm, NULL, vma,
+		vma->ops->bind_vma(vm, NULL, vma->resource,
 				   obj ? obj->cache_level : 0,
 				   was_bound);
 		if (obj) { /* only used during resume => exclusive access */
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
index 95c9145232f5ab8bc09c3bf75b2424450d7abb0c..76880fb8fc197cfbce199dfb0727bbdbd8e06397 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
@@ -9,6 +9,7 @@
 #include "i915_pvinfo.h"
 #include "i915_vgpu.h"
 #include "intel_gt_regs.h"
+#include "intel_mchbar_regs.h"
 
 /**
  * DOC: fence register handling
@@ -427,7 +428,6 @@ int i915_vma_pin_fence(struct i915_vma *vma)
 	 * must keep the device awake whilst using the fence.
 	 */
 	assert_rpm_wakelock_held(vma->vm->gt->uncore->rpm);
-	GEM_BUG_ON(!i915_vma_is_pinned(vma));
 	GEM_BUG_ON(!i915_vma_is_ggtt(vma));
 
 	err = mutex_lock_interruptible(&vma->vm->mutex);
@@ -730,8 +730,8 @@ static void detect_bit_6_swizzle(struct i915_ggtt *ggtt)
 		swizzle_y = I915_BIT_6_SWIZZLE_NONE;
 	}
 
-	i915->ggtt.bit_6_swizzle_x = swizzle_x;
-	i915->ggtt.bit_6_swizzle_y = swizzle_y;
+	to_gt(i915)->ggtt->bit_6_swizzle_x = swizzle_x;
+	to_gt(i915)->ggtt->bit_6_swizzle_y = swizzle_y;
 }
 
 /*
@@ -898,7 +898,7 @@ void intel_gt_init_swizzling(struct intel_gt *gt)
 	struct intel_uncore *uncore = gt->uncore;
 
 	if (GRAPHICS_VER(i915) < 5 ||
-	    i915->ggtt.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
+	    to_gt(i915)->ggtt->bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
 		return;
 
 	intel_uncore_rmw(uncore, DISP_ARB_CTL, 0, DISP_TILE_SURFACE_SWIZZLING);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 57763aede976a5c546a9b2291b0100f6112870f2..e8403fa539097f376939bf85f459ade540c2a7e0 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -3,17 +3,20 @@
  * Copyright © 2019 Intel Corporation
  */
 
+#include <drm/drm_managed.h>
 #include <drm/intel-gtt.h>
 
-#include "intel_gt_debugfs.h"
-
+#include "gem/i915_gem_internal.h"
 #include "gem/i915_gem_lmem.h"
+#include "pxp/intel_pxp.h"
+
 #include "i915_drv.h"
 #include "intel_context.h"
 #include "intel_engine_regs.h"
 #include "intel_gt.h"
 #include "intel_gt_buffer_pool.h"
 #include "intel_gt_clock_utils.h"
+#include "intel_gt_debugfs.h"
 #include "intel_gt_pm.h"
 #include "intel_gt_regs.h"
 #include "intel_gt_requests.h"
@@ -25,7 +28,6 @@
 #include "intel_rps.h"
 #include "intel_uncore.h"
 #include "shmem_utils.h"
-#include "pxp/intel_pxp.h"
 
 void __intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
 {
@@ -89,9 +91,11 @@ int intel_gt_probe_lmem(struct intel_gt *gt)
 	return 0;
 }
 
-void intel_gt_init_hw_early(struct intel_gt *gt, struct i915_ggtt *ggtt)
+int intel_gt_assign_ggtt(struct intel_gt *gt)
 {
-	gt->ggtt = ggtt;
+	gt->ggtt = drmm_kzalloc(&gt->i915->drm, sizeof(*gt->ggtt), GFP_KERNEL);
+
+	return gt->ggtt ? 0 : -ENOMEM;
 }
 
 static const struct intel_mmio_range icl_l3bank_steering_table[] = {
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h
index a913fb6ffec39941d585b7dc325f3177085e95f4..2dad46c3eff21d8995710d099649179400dfd5fd 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -36,7 +36,7 @@ static inline struct intel_gt *huc_to_gt(struct intel_huc *huc)
 
 void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915);
 void __intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915);
-void intel_gt_init_hw_early(struct intel_gt *gt, struct i915_ggtt *ggtt);
+int intel_gt_assign_ggtt(struct intel_gt *gt);
 int intel_gt_probe_lmem(struct intel_gt *gt);
 int intel_gt_init_mmio(struct intel_gt *gt);
 int __must_check intel_gt_init_hw(struct intel_gt *gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
index 9db3dcbd917f7f5a4be97db96d474c69d892a45a..cadfd85785b12870430c16fedcd7d5ddc0e8f71b 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
@@ -3,6 +3,7 @@
  * Copyright © 2014-2018 Intel Corporation
  */
 
+#include "gem/i915_gem_internal.h"
 #include "gem/i915_gem_object.h"
 
 #include "i915_drv.h"
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
index 4e448c13a64c879cdfce60063de106f406ddf382..37765919fe322c98f6f36e97963818dda22edcd4 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
@@ -15,6 +15,7 @@
 #include "intel_gt_pm_debugfs.h"
 #include "intel_gt_regs.h"
 #include "intel_llc.h"
+#include "intel_mchbar_regs.h"
 #include "intel_pcode.h"
 #include "intel_rc6.h"
 #include "intel_rps.h"
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 16d98ebee687af4466192bf7628b7e2fe19d9ae4..18d158d77aba699db91f84ddf0f753cb4f199af9 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -8,779 +8,166 @@
 
 #include "i915_reg_defs.h"
 
-#define ILK_GDSR _MMIO(MCHBAR_MIRROR_BASE + 0x2ca4)
-#define  ILK_GRDOM_FULL		(0 << 1)
-#define  ILK_GRDOM_RENDER	(1 << 1)
-#define  ILK_GRDOM_MEDIA	(3 << 1)
-#define  ILK_GRDOM_MASK		(3 << 1)
-#define  ILK_GRDOM_RESET_ENABLE (1 << 0)
-
-#define GEN6_MBCUNIT_SNPCR	_MMIO(0x900c) /* for LLC config */
-#define   GEN6_MBC_SNPCR_SHIFT	21
-#define   GEN6_MBC_SNPCR_MASK	(3 << 21)
-#define   GEN6_MBC_SNPCR_MAX	(0 << 21)
-#define   GEN6_MBC_SNPCR_MED	(1 << 21)
-#define   GEN6_MBC_SNPCR_LOW	(2 << 21)
-#define   GEN6_MBC_SNPCR_MIN	(3 << 21) /* only 1/16th of the cache is shared */
-
-#define VLV_G3DCTL		_MMIO(0x9024)
-#define VLV_GSCKGCTL		_MMIO(0x9028)
-
-#define FBC_LLC_READ_CTRL	_MMIO(0x9044)
-#define   FBC_LLC_FULLY_OPEN	REG_BIT(30)
-
-#define GEN6_MBCTL		_MMIO(0x0907c)
-#define   GEN6_MBCTL_ENABLE_BOOT_FETCH	(1 << 4)
-#define   GEN6_MBCTL_CTX_FETCH_NEEDED	(1 << 3)
-#define   GEN6_MBCTL_BME_UPDATE_ENABLE	(1 << 2)
-#define   GEN6_MBCTL_MAE_UPDATE_ENABLE	(1 << 1)
-#define   GEN6_MBCTL_BOOT_FETCH_MECH	(1 << 0)
-
-#define GEN6_GDRST	_MMIO(0x941c)
-#define  GEN6_GRDOM_FULL		(1 << 0)
-#define  GEN6_GRDOM_RENDER		(1 << 1)
-#define  GEN6_GRDOM_MEDIA		(1 << 2)
-#define  GEN6_GRDOM_BLT			(1 << 3)
-#define  GEN6_GRDOM_VECS		(1 << 4)
-#define  GEN9_GRDOM_GUC			(1 << 5)
-#define  GEN8_GRDOM_MEDIA2		(1 << 7)
-/* GEN11 changed all bit defs except for FULL & RENDER */
-#define  GEN11_GRDOM_FULL		GEN6_GRDOM_FULL
-#define  GEN11_GRDOM_RENDER		GEN6_GRDOM_RENDER
-#define  GEN11_GRDOM_BLT		(1 << 2)
-#define  GEN11_GRDOM_GUC		(1 << 3)
-#define  GEN11_GRDOM_MEDIA		(1 << 5)
-#define  GEN11_GRDOM_MEDIA2		(1 << 6)
-#define  GEN11_GRDOM_MEDIA3		(1 << 7)
-#define  GEN11_GRDOM_MEDIA4		(1 << 8)
-#define  GEN11_GRDOM_MEDIA5		(1 << 9)
-#define  GEN11_GRDOM_MEDIA6		(1 << 10)
-#define  GEN11_GRDOM_MEDIA7		(1 << 11)
-#define  GEN11_GRDOM_MEDIA8		(1 << 12)
-#define  GEN11_GRDOM_VECS		(1 << 13)
-#define  GEN11_GRDOM_VECS2		(1 << 14)
-#define  GEN11_GRDOM_VECS3		(1 << 15)
-#define  GEN11_GRDOM_VECS4		(1 << 16)
-#define  GEN11_GRDOM_SFC0		(1 << 17)
-#define  GEN11_GRDOM_SFC1		(1 << 18)
-#define  GEN11_GRDOM_SFC2		(1 << 19)
-#define  GEN11_GRDOM_SFC3		(1 << 20)
-#define  GEN11_VCS_SFC_RESET_BIT(instance)	(GEN11_GRDOM_SFC0 << ((instance) >> 1))
-#define  GEN11_VECS_SFC_RESET_BIT(instance)	(GEN11_GRDOM_SFC0 << (instance))
-
-#define GEN11_VCS_SFC_FORCED_LOCK(engine)	_MMIO((engine)->mmio_base + 0x88C)
-#define   GEN11_VCS_SFC_FORCED_LOCK_BIT		(1 << 0)
-#define GEN11_VCS_SFC_LOCK_STATUS(engine)	_MMIO((engine)->mmio_base + 0x890)
-#define   GEN11_VCS_SFC_USAGE_BIT		(1 << 0)
-#define   GEN11_VCS_SFC_LOCK_ACK_BIT		(1 << 1)
-
-#define GEN11_VECS_SFC_FORCED_LOCK(engine)	_MMIO((engine)->mmio_base + 0x201C)
-#define   GEN11_VECS_SFC_FORCED_LOCK_BIT	(1 << 0)
-#define GEN11_VECS_SFC_LOCK_ACK(engine)		_MMIO((engine)->mmio_base + 0x2018)
-#define   GEN11_VECS_SFC_LOCK_ACK_BIT		(1 << 0)
-#define GEN11_VECS_SFC_USAGE(engine)		_MMIO((engine)->mmio_base + 0x2014)
-#define   GEN11_VECS_SFC_USAGE_BIT		(1 << 0)
-
-#define GEN12_HCP_SFC_FORCED_LOCK(engine)	_MMIO((engine)->mmio_base + 0x2910)
-#define   GEN12_HCP_SFC_FORCED_LOCK_BIT		REG_BIT(0)
-#define GEN12_HCP_SFC_LOCK_STATUS(engine)	_MMIO((engine)->mmio_base + 0x2914)
-#define   GEN12_HCP_SFC_LOCK_ACK_BIT		REG_BIT(1)
-#define   GEN12_HCP_SFC_USAGE_BIT			REG_BIT(0)
-
-#define GEN12_SFC_DONE(n)		_MMIO(0x1cc000 + (n) * 0x1000)
-
-#define WAIT_FOR_RC6_EXIT		_MMIO(0x20CC)
-/* HSW only */
-#define   HSW_SELECTIVE_READ_ADDRESSING_SHIFT		2
-#define   HSW_SELECTIVE_READ_ADDRESSING_MASK		(0x3 << HSW_SLECTIVE_READ_ADDRESSING_SHIFT)
-#define   HSW_SELECTIVE_WRITE_ADDRESS_SHIFT		4
-#define   HSW_SELECTIVE_WRITE_ADDRESS_MASK		(0x7 << HSW_SELECTIVE_WRITE_ADDRESS_SHIFT)
-/* HSW+ */
-#define   HSW_WAIT_FOR_RC6_EXIT_ENABLE			(1 << 0)
-#define   HSW_RCS_CONTEXT_ENABLE			(1 << 7)
-#define   HSW_RCS_INHIBIT				(1 << 8)
-/* Gen8 */
-#define   GEN8_SELECTIVE_WRITE_ADDRESS_SHIFT		4
-#define   GEN8_SELECTIVE_WRITE_ADDRESS_MASK		(0x3 << GEN8_SELECTIVE_WRITE_ADDRESS_SHIFT)
-#define   GEN8_SELECTIVE_WRITE_ADDRESS_SHIFT		4
-#define   GEN8_SELECTIVE_WRITE_ADDRESS_MASK		(0x3 << GEN8_SELECTIVE_WRITE_ADDRESS_SHIFT)
-#define   GEN8_SELECTIVE_WRITE_ADDRESSING_ENABLE	(1 << 6)
-#define   GEN8_SELECTIVE_READ_SUBSLICE_SELECT_SHIFT	9
-#define   GEN8_SELECTIVE_READ_SUBSLICE_SELECT_MASK	(0x3 << GEN8_SELECTIVE_READ_SUBSLICE_SELECT_SHIFT)
-#define   GEN8_SELECTIVE_READ_SLICE_SELECT_SHIFT	11
-#define   GEN8_SELECTIVE_READ_SLICE_SELECT_MASK		(0x3 << GEN8_SELECTIVE_READ_SLICE_SELECT_SHIFT)
-#define   GEN8_SELECTIVE_READ_ADDRESSING_ENABLE         (1 << 13)
-
-#define GAM_ECOCHK			_MMIO(0x4090)
-#define   BDW_DISABLE_HDC_INVALIDATION	(1 << 25)
-#define   ECOCHK_SNB_BIT		(1 << 10)
-#define   ECOCHK_DIS_TLB		(1 << 8)
-#define   HSW_ECOCHK_ARB_PRIO_SOL	(1 << 6)
-#define   ECOCHK_PPGTT_CACHE64B		(0x3 << 3)
-#define   ECOCHK_PPGTT_CACHE4B		(0x0 << 3)
-#define   ECOCHK_PPGTT_GFDT_IVB		(0x1 << 4)
-#define   ECOCHK_PPGTT_LLC_IVB		(0x1 << 3)
-#define   ECOCHK_PPGTT_UC_HSW		(0x1 << 3)
-#define   ECOCHK_PPGTT_WT_HSW		(0x2 << 3)
-#define   ECOCHK_PPGTT_WB_HSW		(0x3 << 3)
-
-#define GEN8_RC6_CTX_INFO		_MMIO(0x8504)
-
-#define GAC_ECO_BITS			_MMIO(0x14090)
-#define   ECOBITS_SNB_BIT		(1 << 13)
-#define   ECOBITS_PPGTT_CACHE64B	(3 << 8)
-#define   ECOBITS_PPGTT_CACHE4B		(0 << 8)
-
-#define GEN12_GAMCNTRL_CTRL			_MMIO(0xcf54)
-#define   INVALIDATION_BROADCAST_MODE_DIS	REG_BIT(12)
-#define   GLOBAL_INVALIDATION_MODE		REG_BIT(2)
-
-#define GEN12_GAMSTLB_CTRL		_MMIO(0xcf4c)
-#define   CONTROL_BLOCK_CLKGATE_DIS	REG_BIT(12)
-#define   EGRESS_BLOCK_CLKGATE_DIS	REG_BIT(11)
-#define   TAG_BLOCK_CLKGATE_DIS		REG_BIT(7)
-
-#define GEN12_MERT_MOD_CTRL		_MMIO(0xcf28)
-#define RENDER_MOD_CTRL			_MMIO(0xcf2c)
-#define COMP_MOD_CTRL			_MMIO(0xcf30)
-#define VDBX_MOD_CTRL			_MMIO(0xcf34)
-#define VEBX_MOD_CTRL			_MMIO(0xcf38)
-#define   FORCE_MISS_FTLB		REG_BIT(3)
-
-#define GAB_CTL				_MMIO(0x24000)
-#define   GAB_CTL_CONT_AFTER_PAGEFAULT	(1 << 8)
-
-#define HSW_MI_PREDICATE_RESULT_2	_MMIO(0x2214)
-
-/*
- * Registers used only by the command parser
- */
-#define BCS_SWCTRL _MMIO(0x22200)
-#define   BCS_SRC_Y REG_BIT(0)
-#define   BCS_DST_Y REG_BIT(1)
-
-#define GPGPU_THREADS_DISPATCHED        _MMIO(0x2290)
-#define GPGPU_THREADS_DISPATCHED_UDW	_MMIO(0x2290 + 4)
-#define HS_INVOCATION_COUNT             _MMIO(0x2300)
-#define HS_INVOCATION_COUNT_UDW		_MMIO(0x2300 + 4)
-#define DS_INVOCATION_COUNT             _MMIO(0x2308)
-#define DS_INVOCATION_COUNT_UDW		_MMIO(0x2308 + 4)
-#define IA_VERTICES_COUNT               _MMIO(0x2310)
-#define IA_VERTICES_COUNT_UDW		_MMIO(0x2310 + 4)
-#define IA_PRIMITIVES_COUNT             _MMIO(0x2318)
-#define IA_PRIMITIVES_COUNT_UDW		_MMIO(0x2318 + 4)
-#define VS_INVOCATION_COUNT             _MMIO(0x2320)
-#define VS_INVOCATION_COUNT_UDW		_MMIO(0x2320 + 4)
-#define GS_INVOCATION_COUNT             _MMIO(0x2328)
-#define GS_INVOCATION_COUNT_UDW		_MMIO(0x2328 + 4)
-#define GS_PRIMITIVES_COUNT             _MMIO(0x2330)
-#define GS_PRIMITIVES_COUNT_UDW		_MMIO(0x2330 + 4)
-#define CL_INVOCATION_COUNT             _MMIO(0x2338)
-#define CL_INVOCATION_COUNT_UDW		_MMIO(0x2338 + 4)
-#define CL_PRIMITIVES_COUNT             _MMIO(0x2340)
-#define CL_PRIMITIVES_COUNT_UDW		_MMIO(0x2340 + 4)
-#define PS_INVOCATION_COUNT             _MMIO(0x2348)
-#define PS_INVOCATION_COUNT_UDW		_MMIO(0x2348 + 4)
-#define PS_DEPTH_COUNT                  _MMIO(0x2350)
-#define PS_DEPTH_COUNT_UDW		_MMIO(0x2350 + 4)
-
-/* There are the 4 64-bit counter registers, one for each stream output */
-#define GEN7_SO_NUM_PRIMS_WRITTEN(n)		_MMIO(0x5200 + (n) * 8)
-#define GEN7_SO_NUM_PRIMS_WRITTEN_UDW(n)	_MMIO(0x5200 + (n) * 8 + 4)
-
-#define GEN7_SO_PRIM_STORAGE_NEEDED(n)		_MMIO(0x5240 + (n) * 8)
-#define GEN7_SO_PRIM_STORAGE_NEEDED_UDW(n)	_MMIO(0x5240 + (n) * 8 + 4)
-
-#define GEN7_3DPRIM_END_OFFSET          _MMIO(0x2420)
-#define GEN7_3DPRIM_START_VERTEX        _MMIO(0x2430)
-#define GEN7_3DPRIM_VERTEX_COUNT        _MMIO(0x2434)
-#define GEN7_3DPRIM_INSTANCE_COUNT      _MMIO(0x2438)
-#define GEN7_3DPRIM_START_INSTANCE      _MMIO(0x243C)
-#define GEN7_3DPRIM_BASE_VERTEX         _MMIO(0x2440)
-
-#define GEN7_GPGPU_DISPATCHDIMX         _MMIO(0x2500)
-#define GEN7_GPGPU_DISPATCHDIMY         _MMIO(0x2504)
-#define GEN7_GPGPU_DISPATCHDIMZ         _MMIO(0x2508)
-
-#define GEN12_SQCM		_MMIO(0x8724)
-#define   EN_32B_ACCESS		REG_BIT(30)
-
-/*
- * Flexible, Aggregate EU Counter Registers.
- * Note: these aren't contiguous
- */
-#define EU_PERF_CNTL0	    _MMIO(0xe458)
-#define EU_PERF_CNTL1	    _MMIO(0xe558)
-#define EU_PERF_CNTL2	    _MMIO(0xe658)
-#define EU_PERF_CNTL3	    _MMIO(0xe758)
-#define EU_PERF_CNTL4	    _MMIO(0xe45c)
-#define EU_PERF_CNTL5	    _MMIO(0xe55c)
-#define EU_PERF_CNTL6	    _MMIO(0xe65c)
-
-#define RT_CTRL			_MMIO(0xe530)
-#define  DIS_NULL_QUERY		REG_BIT(10)
-
 /* RPM unit config (Gen8+) */
-#define RPM_CONFIG0	    _MMIO(0x0D00)
-#define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT	3
-#define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK	(1 << GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT)
-#define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ	0
-#define  GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ	1
-#define  GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT	3
-#define  GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK	(0x7 << GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT)
-#define  GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ	0
-#define  GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ	1
-#define  GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ	2
-#define  GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ	3
-#define  GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT	1
-#define  GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK	(0x3 << GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT)
-
-#define RPM_CONFIG1	    _MMIO(0x0D04)
-#define  GEN10_GT_NOA_ENABLE  (1 << 9)
-
-/* GPM unit config (Gen9+) */
-#define CTC_MODE			_MMIO(0xA26C)
-#define  CTC_SOURCE_PARAMETER_MASK 1
-#define  CTC_SOURCE_CRYSTAL_CLOCK	0
-#define  CTC_SOURCE_DIVIDE_LOGIC	1
-#define  CTC_SHIFT_PARAMETER_SHIFT	1
-#define  CTC_SHIFT_PARAMETER_MASK	(0x3 << CTC_SHIFT_PARAMETER_SHIFT)
+#define RPM_CONFIG0				_MMIO(0xd00)
+#define   GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT	3
+#define   GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK	(1 << GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT)
+#define   GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ	0
+#define   GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ	1
+#define   GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT	3
+#define   GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK	(0x7 << GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT)
+#define   GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ	0
+#define   GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ	1
+#define   GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ	2
+#define   GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ	3
+#define   GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT	1
+#define   GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK	(0x3 << GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT)
+
+#define RPM_CONFIG1				_MMIO(0xd04)
+#define   GEN10_GT_NOA_ENABLE			(1 << 9)
 
 /* RCP unit config (Gen8+) */
-#define RCP_CONFIG	    _MMIO(0x0D08)
-
-#define MICRO_BP0_0	    _MMIO(0x9800)
-#define MICRO_BP0_2	    _MMIO(0x9804)
-#define MICRO_BP0_1	    _MMIO(0x9808)
-
-#define MICRO_BP1_0	    _MMIO(0x980C)
-#define MICRO_BP1_2	    _MMIO(0x9810)
-#define MICRO_BP1_1	    _MMIO(0x9814)
-
-#define MICRO_BP2_0	    _MMIO(0x9818)
-#define MICRO_BP2_2	    _MMIO(0x981C)
-#define MICRO_BP2_1	    _MMIO(0x9820)
-
-#define MICRO_BP3_0	    _MMIO(0x9824)
-#define MICRO_BP3_2	    _MMIO(0x9828)
-#define MICRO_BP3_1	    _MMIO(0x982C)
-
-#define MICRO_BP_TRIGGER		_MMIO(0x9830)
-#define MICRO_BP3_COUNT_STATUS01	_MMIO(0x9834)
-#define MICRO_BP3_COUNT_STATUS23	_MMIO(0x9838)
-#define MICRO_BP_FIRED_ARMED		_MMIO(0x983C)
-
-#define GAMTARBMODE		_MMIO(0x04a08)
-#define   ARB_MODE_BWGTLB_DISABLE (1 << 9)
-#define   ARB_MODE_SWIZZLE_BDW	(1 << 1)
-#define RENDER_HWS_PGA_GEN7	_MMIO(0x04080)
-
-#define _RING_FAULT_REG_RCS        0x4094
-#define _RING_FAULT_REG_VCS        0x4194
-#define _RING_FAULT_REG_BCS        0x4294
-#define _RING_FAULT_REG_VECS       0x4394
-#define RING_FAULT_REG(engine)     _MMIO(_PICK((engine)->class, \
-					       _RING_FAULT_REG_RCS, \
-					       _RING_FAULT_REG_VCS, \
-					       _RING_FAULT_REG_VECS, \
-					       _RING_FAULT_REG_BCS))
-#define GEN8_RING_FAULT_REG	_MMIO(0x4094)
-#define GEN12_RING_FAULT_REG	_MMIO(0xcec4)
-#define   GEN8_RING_FAULT_ENGINE_ID(x)	(((x) >> 12) & 0x7)
-#define   RING_FAULT_GTTSEL_MASK (1 << 11)
-#define   RING_FAULT_SRCID(x)	(((x) >> 3) & 0xff)
-#define   RING_FAULT_FAULT_TYPE(x) (((x) >> 1) & 0x3)
-#define   RING_FAULT_VALID	(1 << 0)
-#define DONE_REG		_MMIO(0x40b0)
-#define GEN12_GAM_DONE		_MMIO(0xcf68)
-#define GEN8_PRIVATE_PAT_LO	_MMIO(0x40e0)
-#define GEN8_PRIVATE_PAT_HI	_MMIO(0x40e0 + 4)
-#define GEN10_PAT_INDEX(index)	_MMIO(0x40e0 + (index) * 4)
-#define GEN12_PAT_INDEX(index)	_MMIO(0x4800 + (index) * 4)
-#define BSD_HWS_PGA_GEN7	_MMIO(0x04180)
-#define GEN12_GFX_CCS_AUX_NV	_MMIO(0x4208)
-#define GEN12_VD0_AUX_NV	_MMIO(0x4218)
-#define GEN12_VD1_AUX_NV	_MMIO(0x4228)
-#define GEN12_VD2_AUX_NV	_MMIO(0x4298)
-#define GEN12_VD3_AUX_NV	_MMIO(0x42A8)
-#define GEN12_VE0_AUX_NV	_MMIO(0x4238)
-#define GEN12_VE1_AUX_NV	_MMIO(0x42B8)
-#define   AUX_INV		REG_BIT(0)
-#define BLT_HWS_PGA_GEN7	_MMIO(0x04280)
-#define VEBOX_HWS_PGA_GEN7	_MMIO(0x04380)
-
-#define MISC_STATUS0		_MMIO(0xA500)
-#define MISC_STATUS1		_MMIO(0xA504)
-
-#define GEN7_TLB_RD_ADDR	_MMIO(0x4700)
-
-#define GEN9_GAMT_ECO_REG_RW_IA _MMIO(0x4ab0)
-#define   GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS	(1 << 18)
-
-#define GEN8_GAMW_ECO_DEV_RW_IA _MMIO(0x4080)
-#define   GAMW_ECO_ENABLE_64K_IPS_FIELD 0xF
-#define   GAMW_ECO_DEV_CTX_RELOAD_DISABLE	(1 << 7)
-
-#define GAMT_CHKN_BIT_REG	_MMIO(0x4ab8)
-#define   GAMT_CHKN_DISABLE_L3_COH_PIPE			(1 << 31)
-#define   GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING	(1 << 28)
-#define   GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT	(1 << 24)
+#define RCP_CONFIG				_MMIO(0xd08)
+
+#define RC6_LOCATION				_MMIO(0xd40)
+#define   RC6_CTX_IN_DRAM			(1 << 0)
+#define RC6_CTX_BASE				_MMIO(0xd48)
+#define   RC6_CTX_BASE_MASK			0xFFFFFFF0
+
+#define FORCEWAKE_ACK_MEDIA_VDBOX_GEN11(n)	_MMIO(0xd50 + (n) * 4)
+#define FORCEWAKE_ACK_MEDIA_VEBOX_GEN11(n)	_MMIO(0xd70 + (n) * 4)
+#define FORCEWAKE_ACK_RENDER_GEN9		_MMIO(0xd84)
+#define FORCEWAKE_ACK_MEDIA_GEN9		_MMIO(0xd88)
+
+#define MCFG_MCR_SELECTOR			_MMIO(0xfd0)
+#define SF_MCR_SELECTOR				_MMIO(0xfd8)
+#define GEN8_MCR_SELECTOR			_MMIO(0xfdc)
+#define   GEN8_MCR_SLICE(slice)			(((slice) & 3) << 26)
+#define   GEN8_MCR_SLICE_MASK			GEN8_MCR_SLICE(3)
+#define   GEN8_MCR_SUBSLICE(subslice)		(((subslice) & 3) << 24)
+#define   GEN8_MCR_SUBSLICE_MASK		GEN8_MCR_SUBSLICE(3)
+#define   GEN11_MCR_SLICE(slice)		(((slice) & 0xf) << 27)
+#define   GEN11_MCR_SLICE_MASK			GEN11_MCR_SLICE(0xf)
+#define   GEN11_MCR_SUBSLICE(subslice)		(((subslice) & 0x7) << 24)
+#define   GEN11_MCR_SUBSLICE_MASK		GEN11_MCR_SUBSLICE(0x7)
+
+#define IPEIR_I965				_MMIO(0x2064)
+#define IPEHR_I965				_MMIO(0x2068)
 
-#define GEN8_RTCR	_MMIO(0x4260)
-#define GEN8_M1TCR	_MMIO(0x4264)
-#define GEN8_M2TCR	_MMIO(0x4268)
-#define GEN8_BTCR	_MMIO(0x426c)
-#define GEN8_VTCR	_MMIO(0x4270)
-
-#define IPEIR_I965	_MMIO(0x2064)
-#define IPEHR_I965	_MMIO(0x2068)
-#define GEN7_SC_INSTDONE	_MMIO(0x7100)
-#define GEN12_SC_INSTDONE_EXTRA		_MMIO(0x7104)
-#define GEN12_SC_INSTDONE_EXTRA2	_MMIO(0x7108)
-#define GEN7_SAMPLER_INSTDONE	_MMIO(0xe160)
-#define GEN7_ROW_INSTDONE	_MMIO(0xe164)
-#define XEHPG_INSTDONE_GEOM_SVG		_MMIO(0x666c)
-#define MCFG_MCR_SELECTOR		_MMIO(0xfd0)
-#define SF_MCR_SELECTOR			_MMIO(0xfd8)
-#define GEN8_MCR_SELECTOR		_MMIO(0xfdc)
-#define   GEN8_MCR_SLICE(slice)		(((slice) & 3) << 26)
-#define   GEN8_MCR_SLICE_MASK		GEN8_MCR_SLICE(3)
-#define   GEN8_MCR_SUBSLICE(subslice)	(((subslice) & 3) << 24)
-#define   GEN8_MCR_SUBSLICE_MASK	GEN8_MCR_SUBSLICE(3)
-#define   GEN11_MCR_SLICE(slice)	(((slice) & 0xf) << 27)
-#define   GEN11_MCR_SLICE_MASK		GEN11_MCR_SLICE(0xf)
-#define   GEN11_MCR_SUBSLICE(subslice)	(((subslice) & 0x7) << 24)
-#define   GEN11_MCR_SUBSLICE_MASK	GEN11_MCR_SUBSLICE(0x7)
 /*
  * On GEN4, only the render ring INSTDONE exists and has a different
  * layout than the GEN7+ version.
  * The GEN2 counterpart of this register is GEN2_INSTDONE.
  */
-#define INSTPS		_MMIO(0x2070) /* 965+ only */
-#define GEN4_INSTDONE1	_MMIO(0x207c) /* 965+ only, aka INSTDONE_2 on SNB */
-#define ACTHD_I965	_MMIO(0x2074)
-#define HWS_PGA		_MMIO(0x2080)
-#define HWS_ADDRESS_MASK	0xfffff000
-#define HWS_START_ADDRESS_SHIFT	4
-#define PWRCTXA		_MMIO(0x2088) /* 965GM+ only */
-#define   PWRCTX_EN	(1 << 0)
-#define GEN2_INSTDONE	_MMIO(0x2090)
-#define NOPID		_MMIO(0x2094)
-#define HWSTAM		_MMIO(0x2098)
-
-#define VDBOX_CGCTL3F18(base)		_MMIO((base) + 0x3f18)
-#define   ALNUNIT_CLKGATE_DIS		REG_BIT(13)
-
-#define ERROR_GEN6	_MMIO(0x40a0)
-
-#define GEN8_FAULT_TLB_DATA0		_MMIO(0x4b10)
-#define GEN8_FAULT_TLB_DATA1		_MMIO(0x4b14)
-#define GEN12_FAULT_TLB_DATA0		_MMIO(0xceb8)
-#define GEN12_FAULT_TLB_DATA1		_MMIO(0xcebc)
-#define   FAULT_VA_HIGH_BITS		(0xf << 0)
-#define   FAULT_GTT_SEL			(1 << 4)
-
-#define GEN12_GFX_TLB_INV_CR	_MMIO(0xced8)
-#define GEN12_VD_TLB_INV_CR	_MMIO(0xcedc)
-#define GEN12_VE_TLB_INV_CR	_MMIO(0xcee0)
-#define GEN12_BLT_TLB_INV_CR	_MMIO(0xcee4)
-
-#define GEN12_AUX_ERR_DBG		_MMIO(0x43f4)
+#define INSTPS					_MMIO(0x2070) /* 965+ only */
+#define GEN4_INSTDONE1				_MMIO(0x207c) /* 965+ only, aka INSTDONE_2 on SNB */
+#define ACTHD_I965				_MMIO(0x2074)
+#define HWS_PGA					_MMIO(0x2080)
+#define   HWS_ADDRESS_MASK			0xfffff000
+#define   HWS_START_ADDRESS_SHIFT		4
+
+#define _3D_CHICKEN				_MMIO(0x2084)
+#define   _3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB	(1 << 10)
+
+#define PWRCTXA					_MMIO(0x2088) /* 965GM+ only */
+#define   PWRCTX_EN				(1 << 0)
+
+#define FF_SLICE_CHICKEN			_MMIO(0x2088)
+#define   FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX	(1 << 1)
 
 /* GM45+ chicken bits -- debug workaround bits that may be required
  * for various sorts of correct behavior.  The top 16 bits of each are
  * the enables for writing to the corresponding low bit.
  */
-#define _3D_CHICKEN	_MMIO(0x2084)
-#define  _3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB	(1 << 10)
-#define _3D_CHICKEN2	_MMIO(0x208c)
-
-#define FF_SLICE_CHICKEN	_MMIO(0x2088)
-#define  FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX	(1 << 1)
-
+#define _3D_CHICKEN2				_MMIO(0x208c)
 /* Disables pipelining of read flushes past the SF-WIZ interface.
  * Required on all Ironlake steppings according to the B-Spec, but the
  * particular danger of not doing so is not specified.
  */
-# define _3D_CHICKEN2_WM_READ_PIPELINED			(1 << 14)
-#define _3D_CHICKEN3	_MMIO(0x2090)
-#define  _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX		(1 << 12)
-#define  _3D_CHICKEN_SF_DISABLE_OBJEND_CULL		(1 << 10)
-#define  _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE	(1 << 5)
-#define  _3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL		(1 << 5)
-#define  _3D_CHICKEN_SDE_LIMIT_FIFO_POLY_DEPTH(x)	((x) << 1) /* gen8+ */
-#define  _3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH	(1 << 1) /* gen6 */
-
-#define MI_MODE		_MMIO(0x209c)
-# define VS_TIMER_DISPATCH				(1 << 6)
-# define MI_FLUSH_ENABLE				(1 << 12)
-# define TGL_NESTED_BB_EN				(1 << 12)
-# define ASYNC_FLIP_PERF_DISABLE			(1 << 14)
-# define MODE_IDLE					(1 << 9)
-# define STOP_RING					(1 << 8)
-
-#define GEN6_GT_MODE	_MMIO(0x20d0)
-#define GEN7_GT_MODE	_MMIO(0x7008)
-#define   GEN6_WIZ_HASHING(hi, lo)			(((hi) << 9) | ((lo) << 7))
-#define   GEN6_WIZ_HASHING_8x8				GEN6_WIZ_HASHING(0, 0)
-#define   GEN6_WIZ_HASHING_8x4				GEN6_WIZ_HASHING(0, 1)
-#define   GEN6_WIZ_HASHING_16x4				GEN6_WIZ_HASHING(1, 0)
-#define   GEN6_WIZ_HASHING_MASK				GEN6_WIZ_HASHING(1, 1)
-#define   GEN6_TD_FOUR_ROW_DISPATCH_DISABLE		(1 << 5)
-#define   GEN9_IZ_HASHING_MASK(slice)			(0x3 << ((slice) * 2))
-#define   GEN9_IZ_HASHING(slice, val)			((val) << ((slice) * 2))
+#define   _3D_CHICKEN2_WM_READ_PIPELINED	(1 << 14)
+
+#define _3D_CHICKEN3				_MMIO(0x2090)
+#define   _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX	(1 << 12)
+#define   _3D_CHICKEN_SF_DISABLE_OBJEND_CULL	(1 << 10)
+#define   _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE	(1 << 5)
+#define   _3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL	(1 << 5)
+#define   _3D_CHICKEN_SDE_LIMIT_FIFO_POLY_DEPTH(x)	((x) << 1) /* gen8+ */
+#define   _3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH	(1 << 1) /* gen6 */
+
+#define GEN2_INSTDONE				_MMIO(0x2090)
+#define NOPID					_MMIO(0x2094)
+#define HWSTAM					_MMIO(0x2098)
+
+#define WAIT_FOR_RC6_EXIT			_MMIO(0x20cc)
+/* HSW only */
+#define   HSW_SELECTIVE_READ_ADDRESSING_SHIFT	2
+#define   HSW_SELECTIVE_READ_ADDRESSING_MASK	(0x3 << HSW_SLECTIVE_READ_ADDRESSING_SHIFT)
+#define   HSW_SELECTIVE_WRITE_ADDRESS_SHIFT	4
+#define   HSW_SELECTIVE_WRITE_ADDRESS_MASK	(0x7 << HSW_SELECTIVE_WRITE_ADDRESS_SHIFT)
+/* HSW+ */
+#define   HSW_WAIT_FOR_RC6_EXIT_ENABLE		(1 << 0)
+#define   HSW_RCS_CONTEXT_ENABLE		(1 << 7)
+#define   HSW_RCS_INHIBIT			(1 << 8)
+/* Gen8 */
+#define   GEN8_SELECTIVE_WRITE_ADDRESS_SHIFT	4
+#define   GEN8_SELECTIVE_WRITE_ADDRESS_MASK	(0x3 << GEN8_SELECTIVE_WRITE_ADDRESS_SHIFT)
+#define   GEN8_SELECTIVE_WRITE_ADDRESS_SHIFT	4
+#define   GEN8_SELECTIVE_WRITE_ADDRESS_MASK	(0x3 << GEN8_SELECTIVE_WRITE_ADDRESS_SHIFT)
+#define   GEN8_SELECTIVE_WRITE_ADDRESSING_ENABLE	(1 << 6)
+#define   GEN8_SELECTIVE_READ_SUBSLICE_SELECT_SHIFT	9
+#define   GEN8_SELECTIVE_READ_SUBSLICE_SELECT_MASK	(0x3 << GEN8_SELECTIVE_READ_SUBSLICE_SELECT_SHIFT)
+#define   GEN8_SELECTIVE_READ_SLICE_SELECT_SHIFT	11
+#define   GEN8_SELECTIVE_READ_SLICE_SELECT_MASK	(0x3 << GEN8_SELECTIVE_READ_SLICE_SELECT_SHIFT)
+#define   GEN8_SELECTIVE_READ_ADDRESSING_ENABLE	(1 << 13)
+
+#define GEN6_GT_MODE				_MMIO(0x20d0)
+#define   GEN6_WIZ_HASHING(hi, lo)		(((hi) << 9) | ((lo) << 7))
+#define   GEN6_WIZ_HASHING_8x8			GEN6_WIZ_HASHING(0, 0)
+#define   GEN6_WIZ_HASHING_8x4			GEN6_WIZ_HASHING(0, 1)
+#define   GEN6_WIZ_HASHING_16x4			GEN6_WIZ_HASHING(1, 0)
+#define   GEN6_WIZ_HASHING_MASK			GEN6_WIZ_HASHING(1, 1)
+#define   GEN6_TD_FOUR_ROW_DISPATCH_DISABLE	(1 << 5)
 
 /* chicken reg for WaConextSwitchWithConcurrentTLBInvalidate */
-#define GEN9_CSFE_CHICKEN1_RCS _MMIO(0x20D4)
-#define   GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE (1 << 2)
-#define   GEN11_ENABLE_32_PLANE_MODE (1 << 7)
+#define GEN9_CSFE_CHICKEN1_RCS			_MMIO(0x20d4)
+#define   GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE	(1 << 2)
+#define   GEN11_ENABLE_32_PLANE_MODE		(1 << 7)
 
-#define SCCGCTL94DC		_MMIO(0x94dc)
-#define   CG3DDISURB		REG_BIT(14)
+#define GEN7_FF_SLICE_CS_CHICKEN1		_MMIO(0x20e0)
+#define   GEN9_FFSC_PERCTX_PREEMPT_CTRL		(1 << 14)
 
-#define MLTICTXCTL		_MMIO(0xb170)
-#define   TDONRENDER		REG_BIT(2)
+#define FF_SLICE_CS_CHICKEN2			_MMIO(0x20e4)
+#define   GEN9_TSG_BARRIER_ACK_DISABLE		(1 << 8)
+#define   GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE	(1 << 10)
 
-#define L3SQCREG1_CCS0		_MMIO(0xb200)
-#define   FLUSHALLNONCOH	REG_BIT(5)
+#define GEN9_CS_DEBUG_MODE1			_MMIO(0x20ec)
+#define   FF_DOP_CLOCK_GATE_DISABLE		REG_BIT(1)
+#define GEN12_CS_DEBUG_MODE1_CCCSUNIT_BE_COMMON	_MMIO(0x20ec)
+#define   GEN12_REPLAY_MODE_GRANULARITY		REG_BIT(0)
 
 /* WaClearTdlStateAckDirtyBits */
-#define GEN8_STATE_ACK		_MMIO(0x20F0)
-#define GEN9_STATE_ACK_SLICE1	_MMIO(0x20F8)
-#define GEN9_STATE_ACK_SLICE2	_MMIO(0x2100)
-#define   GEN9_STATE_ACK_TDL0 (1 << 12)
-#define   GEN9_STATE_ACK_TDL1 (1 << 13)
-#define   GEN9_STATE_ACK_TDL2 (1 << 14)
-#define   GEN9_STATE_ACK_TDL3 (1 << 15)
-#define   GEN9_SUBSLICE_TDL_ACK_BITS \
+#define GEN8_STATE_ACK				_MMIO(0x20f0)
+#define GEN9_STATE_ACK_SLICE1			_MMIO(0x20f8)
+#define GEN9_STATE_ACK_SLICE2			_MMIO(0x2100)
+#define   GEN9_STATE_ACK_TDL0			(1 << 12)
+#define   GEN9_STATE_ACK_TDL1			(1 << 13)
+#define   GEN9_STATE_ACK_TDL2			(1 << 14)
+#define   GEN9_STATE_ACK_TDL3			(1 << 15)
+#define   GEN9_SUBSLICE_TDL_ACK_BITS	\
 	(GEN9_STATE_ACK_TDL3 | GEN9_STATE_ACK_TDL2 | \
 	 GEN9_STATE_ACK_TDL1 | GEN9_STATE_ACK_TDL0)
 
-#define GFX_MODE	_MMIO(0x2520)
-
-#define CACHE_MODE_0	_MMIO(0x2120) /* 915+ only */
-#define   CM0_PIPELINED_RENDER_FLUSH_DISABLE (1 << 8)
-#define   CM0_IZ_OPT_DISABLE      (1 << 6)
-#define   CM0_ZR_OPT_DISABLE      (1 << 5)
-#define	  CM0_STC_EVICT_DISABLE_LRA_SNB	(1 << 5)
-#define   CM0_DEPTH_EVICT_DISABLE (1 << 4)
-#define   CM0_COLOR_EVICT_DISABLE (1 << 3)
-#define   CM0_DEPTH_WRITE_DISABLE (1 << 1)
-#define   CM0_RC_OP_FLUSH_DISABLE (1 << 0)
-#define GFX_FLSH_CNTL	_MMIO(0x2170) /* 915+ only */
-#define GFX_FLSH_CNTL_GEN6	_MMIO(0x101008)
-#define   GFX_FLSH_CNTL_EN	(1 << 0)
-
-#define CACHE_MODE_0_GEN7	_MMIO(0x7000) /* IVB+ */
-#define RC_OP_FLUSH_ENABLE (1 << 0)
-#define   HIZ_RAW_STALL_OPT_DISABLE (1 << 2)
-#define CACHE_MODE_1		_MMIO(0x7004) /* IVB+ */
-#define   PIXEL_SUBSPAN_COLLECT_OPT_DISABLE	(1 << 6)
-#define   GEN8_4x4_STC_OPTIMIZATION_DISABLE	(1 << 6)
-#define   GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE	(1 << 1)
+#define CACHE_MODE_0				_MMIO(0x2120) /* 915+ only */
+#define   CM0_PIPELINED_RENDER_FLUSH_DISABLE	(1 << 8)
+#define   CM0_IZ_OPT_DISABLE			(1 << 6)
+#define   CM0_ZR_OPT_DISABLE			(1 << 5)
+#define	  CM0_STC_EVICT_DISABLE_LRA_SNB		(1 << 5)
+#define   CM0_DEPTH_EVICT_DISABLE		(1 << 4)
+#define   CM0_COLOR_EVICT_DISABLE		(1 << 3)
+#define   CM0_DEPTH_WRITE_DISABLE		(1 << 1)
+#define   CM0_RC_OP_FLUSH_DISABLE		(1 << 0)
 
-#define GEN6_RCS_PWR_FSM _MMIO(0x22ac)
-#define GEN9_RCS_FE_FSM2 _MMIO(0x22a4)
-
-#define GEN10_CACHE_MODE_SS			_MMIO(0xe420)
-#define   ENABLE_PREFETCH_INTO_IC		REG_BIT(3)
-#define   FLOAT_BLEND_OPTIMIZATION_ENABLE	REG_BIT(4)
-
-/* Fuse readout registers for GT */
-#define HSW_PAVP_FUSE1			_MMIO(0x911C)
-#define   XEHP_SFC_ENABLE_MASK		REG_GENMASK(27, 24)
-#define   HSW_F1_EU_DIS_MASK		REG_GENMASK(17, 16)
-#define   HSW_F1_EU_DIS_10EUS		0
-#define   HSW_F1_EU_DIS_8EUS		1
-#define   HSW_F1_EU_DIS_6EUS		2
-
-#define CHV_FUSE_GT			_MMIO(VLV_DISPLAY_BASE + 0x2168)
-#define   CHV_FGT_DISABLE_SS0		(1 << 10)
-#define   CHV_FGT_DISABLE_SS1		(1 << 11)
-#define   CHV_FGT_EU_DIS_SS0_R0_SHIFT	16
-#define   CHV_FGT_EU_DIS_SS0_R0_MASK	(0xf << CHV_FGT_EU_DIS_SS0_R0_SHIFT)
-#define   CHV_FGT_EU_DIS_SS0_R1_SHIFT	20
-#define   CHV_FGT_EU_DIS_SS0_R1_MASK	(0xf << CHV_FGT_EU_DIS_SS0_R1_SHIFT)
-#define   CHV_FGT_EU_DIS_SS1_R0_SHIFT	24
-#define   CHV_FGT_EU_DIS_SS1_R0_MASK	(0xf << CHV_FGT_EU_DIS_SS1_R0_SHIFT)
-#define   CHV_FGT_EU_DIS_SS1_R1_SHIFT	28
-#define   CHV_FGT_EU_DIS_SS1_R1_MASK	(0xf << CHV_FGT_EU_DIS_SS1_R1_SHIFT)
-
-#define GEN8_FUSE2			_MMIO(0x9120)
-#define   GEN8_F2_SS_DIS_SHIFT		21
-#define   GEN8_F2_SS_DIS_MASK		(0x7 << GEN8_F2_SS_DIS_SHIFT)
-#define   GEN8_F2_S_ENA_SHIFT		25
-#define   GEN8_F2_S_ENA_MASK		(0x7 << GEN8_F2_S_ENA_SHIFT)
-
-#define   GEN9_F2_SS_DIS_SHIFT		20
-#define   GEN9_F2_SS_DIS_MASK		(0xf << GEN9_F2_SS_DIS_SHIFT)
-
-#define   GEN10_F2_S_ENA_SHIFT		22
-#define   GEN10_F2_S_ENA_MASK		(0x3f << GEN10_F2_S_ENA_SHIFT)
-#define   GEN10_F2_SS_DIS_SHIFT		18
-#define   GEN10_F2_SS_DIS_MASK		(0xf << GEN10_F2_SS_DIS_SHIFT)
-
-#define	GEN10_MIRROR_FUSE3		_MMIO(0x9118)
-#define GEN10_L3BANK_PAIR_COUNT     4
-#define GEN10_L3BANK_MASK   0x0F
-/* on Xe_HP the same fuses indicates mslices instead of L3 banks */
-#define GEN12_MAX_MSLICES 4
-#define GEN12_MEML3_EN_MASK 0x0F
-
-#define GEN8_EU_DISABLE0		_MMIO(0x9134)
-#define   GEN8_EU_DIS0_S0_MASK		0xffffff
-#define   GEN8_EU_DIS0_S1_SHIFT		24
-#define   GEN8_EU_DIS0_S1_MASK		(0xff << GEN8_EU_DIS0_S1_SHIFT)
-
-#define GEN8_EU_DISABLE1		_MMIO(0x9138)
-#define   GEN8_EU_DIS1_S1_MASK		0xffff
-#define   GEN8_EU_DIS1_S2_SHIFT		16
-#define   GEN8_EU_DIS1_S2_MASK		(0xffff << GEN8_EU_DIS1_S2_SHIFT)
-
-#define GEN8_EU_DISABLE2		_MMIO(0x913c)
-#define   GEN8_EU_DIS2_S2_MASK		0xff
-
-#define GEN9_EU_DISABLE(slice)		_MMIO(0x9134 + (slice) * 0x4)
-
-#define GEN10_EU_DISABLE3		_MMIO(0x9140)
-#define   GEN10_EU_DIS_SS_MASK		0xff
-
-#define GEN11_GT_VEBOX_VDBOX_DISABLE	_MMIO(0x9140)
-#define   GEN11_GT_VDBOX_DISABLE_MASK	0xff
-#define   GEN11_GT_VEBOX_DISABLE_SHIFT	16
-#define   GEN11_GT_VEBOX_DISABLE_MASK	(0x0f << GEN11_GT_VEBOX_DISABLE_SHIFT)
-
-#define GEN11_EU_DISABLE _MMIO(0x9134)
-#define GEN11_EU_DIS_MASK 0xFF
-
-#define GEN11_GT_SLICE_ENABLE _MMIO(0x9138)
-#define GEN11_GT_S_ENA_MASK 0xFF
-
-#define GEN11_GT_SUBSLICE_DISABLE _MMIO(0x913C)
-
-#define GEN12_GT_GEOMETRY_DSS_ENABLE _MMIO(0x913C)
-#define GEN12_GT_COMPUTE_DSS_ENABLE _MMIO(0x9144)
-
-#define XEHP_EU_ENABLE			_MMIO(0x9134)
-#define XEHP_EU_ENA_MASK		0xFF
-
-#define CRSTANDVID		_MMIO(0x11100)
-#define PXVFREQ(fstart)		_MMIO(0x11110 + (fstart) * 4)  /* P[0-15]VIDFREQ (0x1114c) (Ironlake) */
-#define   PXVFREQ_PX_MASK	0x7f000000
-#define   PXVFREQ_PX_SHIFT	24
-#define VIDFREQ_BASE		_MMIO(0x11110)
-#define VIDFREQ1		_MMIO(0x11110) /* VIDFREQ1-4 (0x1111c) (Cantiga) */
-#define VIDFREQ2		_MMIO(0x11114)
-#define VIDFREQ3		_MMIO(0x11118)
-#define VIDFREQ4		_MMIO(0x1111c)
-#define   VIDFREQ_P0_MASK	0x1f000000
-#define   VIDFREQ_P0_SHIFT	24
-#define   VIDFREQ_P0_CSCLK_MASK	0x00f00000
-#define   VIDFREQ_P0_CSCLK_SHIFT 20
-#define   VIDFREQ_P0_CRCLK_MASK	0x000f0000
-#define   VIDFREQ_P0_CRCLK_SHIFT 16
-#define   VIDFREQ_P1_MASK	0x00001f00
-#define   VIDFREQ_P1_SHIFT	8
-#define   VIDFREQ_P1_CSCLK_MASK	0x000000f0
-#define   VIDFREQ_P1_CSCLK_SHIFT 4
-#define   VIDFREQ_P1_CRCLK_MASK	0x0000000f
-#define INTTOEXT_BASE_ILK	_MMIO(0x11300)
-#define INTTOEXT_BASE		_MMIO(0x11120) /* INTTOEXT1-8 (0x1113c) */
-#define   INTTOEXT_MAP3_SHIFT	24
-#define   INTTOEXT_MAP3_MASK	(0x1f << INTTOEXT_MAP3_SHIFT)
-#define   INTTOEXT_MAP2_SHIFT	16
-#define   INTTOEXT_MAP2_MASK	(0x1f << INTTOEXT_MAP2_SHIFT)
-#define   INTTOEXT_MAP1_SHIFT	8
-#define   INTTOEXT_MAP1_MASK	(0x1f << INTTOEXT_MAP1_SHIFT)
-#define   INTTOEXT_MAP0_SHIFT	0
-#define   INTTOEXT_MAP0_MASK	(0x1f << INTTOEXT_MAP0_SHIFT)
-#define MEMSWCTL		_MMIO(0x11170) /* Ironlake only */
-#define   MEMCTL_CMD_MASK	0xe000
-#define   MEMCTL_CMD_SHIFT	13
-#define   MEMCTL_CMD_RCLK_OFF	0
-#define   MEMCTL_CMD_RCLK_ON	1
-#define   MEMCTL_CMD_CHFREQ	2
-#define   MEMCTL_CMD_CHVID	3
-#define   MEMCTL_CMD_VMMOFF	4
-#define   MEMCTL_CMD_VMMON	5
-#define   MEMCTL_CMD_STS	(1 << 12) /* write 1 triggers command, clears
-					     when command complete */
-#define   MEMCTL_FREQ_MASK	0x0f00 /* jitter, from 0-15 */
-#define   MEMCTL_FREQ_SHIFT	8
-#define   MEMCTL_SFCAVM		(1 << 7)
-#define   MEMCTL_TGT_VID_MASK	0x007f
-#define MEMIHYST		_MMIO(0x1117c)
-#define MEMINTREN		_MMIO(0x11180) /* 16 bits */
-#define   MEMINT_RSEXIT_EN	(1 << 8)
-#define   MEMINT_CX_SUPR_EN	(1 << 7)
-#define   MEMINT_CONT_BUSY_EN	(1 << 6)
-#define   MEMINT_AVG_BUSY_EN	(1 << 5)
-#define   MEMINT_EVAL_CHG_EN	(1 << 4)
-#define   MEMINT_MON_IDLE_EN	(1 << 3)
-#define   MEMINT_UP_EVAL_EN	(1 << 2)
-#define   MEMINT_DOWN_EVAL_EN	(1 << 1)
-#define   MEMINT_SW_CMD_EN	(1 << 0)
-#define MEMINTRSTR		_MMIO(0x11182) /* 16 bits */
-#define   MEM_RSEXIT_MASK	0xc000
-#define   MEM_RSEXIT_SHIFT	14
-#define   MEM_CONT_BUSY_MASK	0x3000
-#define   MEM_CONT_BUSY_SHIFT	12
-#define   MEM_AVG_BUSY_MASK	0x0c00
-#define   MEM_AVG_BUSY_SHIFT	10
-#define   MEM_EVAL_CHG_MASK	0x0300
-#define   MEM_EVAL_BUSY_SHIFT	8
-#define   MEM_MON_IDLE_MASK	0x00c0
-#define   MEM_MON_IDLE_SHIFT	6
-#define   MEM_UP_EVAL_MASK	0x0030
-#define   MEM_UP_EVAL_SHIFT	4
-#define   MEM_DOWN_EVAL_MASK	0x000c
-#define   MEM_DOWN_EVAL_SHIFT	2
-#define   MEM_SW_CMD_MASK	0x0003
-#define   MEM_INT_STEER_GFX	0
-#define   MEM_INT_STEER_CMR	1
-#define   MEM_INT_STEER_SMI	2
-#define   MEM_INT_STEER_SCI	3
-#define MEMINTRSTS		_MMIO(0x11184)
-#define   MEMINT_RSEXIT		(1 << 7)
-#define   MEMINT_CONT_BUSY	(1 << 6)
-#define   MEMINT_AVG_BUSY	(1 << 5)
-#define   MEMINT_EVAL_CHG	(1 << 4)
-#define   MEMINT_MON_IDLE	(1 << 3)
-#define   MEMINT_UP_EVAL	(1 << 2)
-#define   MEMINT_DOWN_EVAL	(1 << 1)
-#define   MEMINT_SW_CMD		(1 << 0)
-#define MEMMODECTL		_MMIO(0x11190)
-#define   MEMMODE_BOOST_EN	(1 << 31)
-#define   MEMMODE_BOOST_FREQ_MASK 0x0f000000 /* jitter for boost, 0-15 */
-#define   MEMMODE_BOOST_FREQ_SHIFT 24
-#define   MEMMODE_IDLE_MODE_MASK 0x00030000
-#define   MEMMODE_IDLE_MODE_SHIFT 16
-#define   MEMMODE_IDLE_MODE_EVAL 0
-#define   MEMMODE_IDLE_MODE_CONT 1
-#define   MEMMODE_HWIDLE_EN	(1 << 15)
-#define   MEMMODE_SWMODE_EN	(1 << 14)
-#define   MEMMODE_RCLK_GATE	(1 << 13)
-#define   MEMMODE_HW_UPDATE	(1 << 12)
-#define   MEMMODE_FSTART_MASK	0x00000f00 /* starting jitter, 0-15 */
-#define   MEMMODE_FSTART_SHIFT	8
-#define   MEMMODE_FMAX_MASK	0x000000f0 /* max jitter, 0-15 */
-#define   MEMMODE_FMAX_SHIFT	4
-#define   MEMMODE_FMIN_MASK	0x0000000f /* min jitter, 0-15 */
-#define RCBMAXAVG		_MMIO(0x1119c)
-#define MEMSWCTL2		_MMIO(0x1119e) /* Cantiga only */
-#define   SWMEMCMD_RENDER_OFF	(0 << 13)
-#define   SWMEMCMD_RENDER_ON	(1 << 13)
-#define   SWMEMCMD_SWFREQ	(2 << 13)
-#define   SWMEMCMD_TARVID	(3 << 13)
-#define   SWMEMCMD_VRM_OFF	(4 << 13)
-#define   SWMEMCMD_VRM_ON	(5 << 13)
-#define   CMDSTS		(1 << 12)
-#define   SFCAVM		(1 << 11)
-#define   SWFREQ_MASK		0x0380 /* P0-7 */
-#define   SWFREQ_SHIFT		7
-#define   TARVID_MASK		0x001f
-#define MEMSTAT_CTG		_MMIO(0x111a0)
-#define RCBMINAVG		_MMIO(0x111a0)
-#define RCUPEI			_MMIO(0x111b0)
-#define RCDNEI			_MMIO(0x111b4)
-#define RSTDBYCTL		_MMIO(0x111b8)
-#define   RS1EN			(1 << 31)
-#define   RS2EN			(1 << 30)
-#define   RS3EN			(1 << 29)
-#define   D3RS3EN		(1 << 28) /* Display D3 imlies RS3 */
-#define   SWPROMORSX		(1 << 27) /* RSx promotion timers ignored */
-#define   RCWAKERW		(1 << 26) /* Resetwarn from PCH causes wakeup */
-#define   DPRSLPVREN		(1 << 25) /* Fast voltage ramp enable */
-#define   GFXTGHYST		(1 << 24) /* Hysteresis to allow trunk gating */
-#define   RCX_SW_EXIT		(1 << 23) /* Leave RSx and prevent re-entry */
-#define   RSX_STATUS_MASK	(7 << 20)
-#define   RSX_STATUS_ON		(0 << 20)
-#define   RSX_STATUS_RC1	(1 << 20)
-#define   RSX_STATUS_RC1E	(2 << 20)
-#define   RSX_STATUS_RS1	(3 << 20)
-#define   RSX_STATUS_RS2	(4 << 20) /* aka rc6 */
-#define   RSX_STATUS_RSVD	(5 << 20) /* deep rc6 unsupported on ilk */
-#define   RSX_STATUS_RS3	(6 << 20) /* rs3 unsupported on ilk */
-#define   RSX_STATUS_RSVD2	(7 << 20)
-#define   UWRCRSXE		(1 << 19) /* wake counter limit prevents rsx */
-#define   RSCRP			(1 << 18) /* rs requests control on rs1/2 reqs */
-#define   JRSC			(1 << 17) /* rsx coupled to cpu c-state */
-#define   RS2INC0		(1 << 16) /* allow rs2 in cpu c0 */
-#define   RS1CONTSAV_MASK	(3 << 14)
-#define   RS1CONTSAV_NO_RS1	(0 << 14) /* rs1 doesn't save/restore context */
-#define   RS1CONTSAV_RSVD	(1 << 14)
-#define   RS1CONTSAV_SAVE_RS1	(2 << 14) /* rs1 saves context */
-#define   RS1CONTSAV_FULL_RS1	(3 << 14) /* rs1 saves and restores context */
-#define   NORMSLEXLAT_MASK	(3 << 12)
-#define   SLOW_RS123		(0 << 12)
-#define   SLOW_RS23		(1 << 12)
-#define   SLOW_RS3		(2 << 12)
-#define   NORMAL_RS123		(3 << 12)
-#define   RCMODE_TIMEOUT	(1 << 11) /* 0 is eval interval method */
-#define   IMPROMOEN		(1 << 10) /* promo is immediate or delayed until next idle interval (only for timeout method above) */
-#define   RCENTSYNC		(1 << 9) /* rs coupled to cpu c-state (3/6/7) */
-#define   STATELOCK		(1 << 7) /* locked to rs_cstate if 0 */
-#define   RS_CSTATE_MASK	(3 << 4)
-#define   RS_CSTATE_C367_RS1	(0 << 4)
-#define   RS_CSTATE_C36_RS1_C7_RS2 (1 << 4)
-#define   RS_CSTATE_RSVD	(2 << 4)
-#define   RS_CSTATE_C367_RS2	(3 << 4)
-#define   REDSAVES		(1 << 3) /* no context save if was idle during rs0 */
-#define   REDRESTORES		(1 << 2) /* no restore if was idle during rs0 */
-#define VIDCTL			_MMIO(0x111c0)
-#define VIDSTS			_MMIO(0x111c8)
-#define VIDSTART		_MMIO(0x111cc) /* 8 bits */
-#define MEMSTAT_ILK		_MMIO(0x111f8)
-#define   MEMSTAT_VID_MASK	0x7f00
-#define   MEMSTAT_VID_SHIFT	8
-#define   MEMSTAT_PSTATE_MASK	0x00f8
-#define   MEMSTAT_PSTATE_SHIFT  3
-#define   MEMSTAT_MON_ACTV	(1 << 2)
-#define   MEMSTAT_SRC_CTL_MASK	0x0003
-#define   MEMSTAT_SRC_CTL_CORE	0
-#define   MEMSTAT_SRC_CTL_TRB	1
-#define   MEMSTAT_SRC_CTL_THM	2
-#define   MEMSTAT_SRC_CTL_STDBY 3
-#define RCPREVBSYTUPAVG		_MMIO(0x113b8)
-#define RCPREVBSYTDNAVG		_MMIO(0x113bc)
-#define PMMISC			_MMIO(0x11214)
-#define   MCPPCE_EN		(1 << 0) /* enable PM_MSG from PCH->MPC */
-#define SDEW			_MMIO(0x1124c)
-#define CSIEW0			_MMIO(0x11250)
-#define CSIEW1			_MMIO(0x11254)
-#define CSIEW2			_MMIO(0x11258)
-#define PEW(i)			_MMIO(0x1125c + (i) * 4) /* 5 registers */
-#define DEW(i)			_MMIO(0x11270 + (i) * 4) /* 3 registers */
-#define MCHAFE			_MMIO(0x112c0)
-#define CSIEC			_MMIO(0x112e0)
-#define DMIEC			_MMIO(0x112e4)
-#define DDREC			_MMIO(0x112e8)
-#define PEG0EC			_MMIO(0x112ec)
-#define PEG1EC			_MMIO(0x112f0)
-#define GFXEC			_MMIO(0x112f4)
-#define RPPREVBSYTUPAVG		_MMIO(0x113b8)
-#define RPPREVBSYTDNAVG		_MMIO(0x113bc)
-#define ECR			_MMIO(0x11600)
-#define   ECR_GPFE		(1 << 31)
-#define   ECR_IMONE		(1 << 30)
-#define   ECR_CAP_MASK		0x0000001f /* Event range, 0-31 */
-#define OGW0			_MMIO(0x11608)
-#define OGW1			_MMIO(0x1160c)
-#define EG0			_MMIO(0x11610)
-#define EG1			_MMIO(0x11614)
-#define EG2			_MMIO(0x11618)
-#define EG3			_MMIO(0x1161c)
-#define EG4			_MMIO(0x11620)
-#define EG5			_MMIO(0x11624)
-#define EG6			_MMIO(0x11628)
-#define EG7			_MMIO(0x1162c)
-#define PXW(i)			_MMIO(0x11664 + (i) * 4) /* 4 registers */
-#define PXWL(i)			_MMIO(0x11680 + (i) * 8) /* 8 registers */
-#define LCFUSE02		_MMIO(0x116c0)
-#define   LCFUSE_HIV_MASK	0x000000ff
-
-#define GEN6_GT_THREAD_STATUS_REG _MMIO(0x13805c)
-#define GEN6_GT_THREAD_STATUS_CORE_MASK 0x7
+#define GFX_FLSH_CNTL				_MMIO(0x2170) /* 915+ only */
 
 /*
  * Logical Context regs
@@ -798,427 +185,541 @@
  * - GT1 size just indicates how much of render context
  *   doesn't need saving on GT1
  */
-#define CXT_SIZE		_MMIO(0x21a0)
-#define GEN6_CXT_POWER_SIZE(cxt_reg)	(((cxt_reg) >> 24) & 0x3f)
-#define GEN6_CXT_RING_SIZE(cxt_reg)	(((cxt_reg) >> 18) & 0x3f)
-#define GEN6_CXT_RENDER_SIZE(cxt_reg)	(((cxt_reg) >> 12) & 0x3f)
-#define GEN6_CXT_EXTENDED_SIZE(cxt_reg)	(((cxt_reg) >> 6) & 0x3f)
-#define GEN6_CXT_PIPELINE_SIZE(cxt_reg)	(((cxt_reg) >> 0) & 0x3f)
-#define GEN6_CXT_TOTAL_SIZE(cxt_reg)	(GEN6_CXT_RING_SIZE(cxt_reg) + \
-					GEN6_CXT_EXTENDED_SIZE(cxt_reg) + \
-					GEN6_CXT_PIPELINE_SIZE(cxt_reg))
-#define GEN7_CXT_SIZE		_MMIO(0x21a8)
-#define GEN7_CXT_POWER_SIZE(ctx_reg)	(((ctx_reg) >> 25) & 0x7f)
-#define GEN7_CXT_RING_SIZE(ctx_reg)	(((ctx_reg) >> 22) & 0x7)
-#define GEN7_CXT_RENDER_SIZE(ctx_reg)	(((ctx_reg) >> 16) & 0x3f)
-#define GEN7_CXT_EXTENDED_SIZE(ctx_reg)	(((ctx_reg) >> 9) & 0x7f)
-#define GEN7_CXT_GT1_SIZE(ctx_reg)	(((ctx_reg) >> 6) & 0x7)
-#define GEN7_CXT_VFSTATE_SIZE(ctx_reg)	(((ctx_reg) >> 0) & 0x3f)
-#define GEN7_CXT_TOTAL_SIZE(ctx_reg)	(GEN7_CXT_EXTENDED_SIZE(ctx_reg) + \
-					 GEN7_CXT_VFSTATE_SIZE(ctx_reg))
-
-enum {
-	INTEL_ADVANCED_CONTEXT = 0,
-	INTEL_LEGACY_32B_CONTEXT,
-	INTEL_ADVANCED_AD_CONTEXT,
-	INTEL_LEGACY_64B_CONTEXT
-};
-
-enum {
-	FAULT_AND_HANG = 0,
-	FAULT_AND_HALT, /* Debug only */
-	FAULT_AND_STREAM,
-	FAULT_AND_CONTINUE /* Unsupported */
-};
-
-#define CTX_GTT_ADDRESS_MASK GENMASK(31, 12)
-#define GEN8_CTX_VALID (1 << 0)
-#define GEN8_CTX_FORCE_PD_RESTORE (1 << 1)
-#define GEN8_CTX_FORCE_RESTORE (1 << 2)
-#define GEN8_CTX_L3LLC_COHERENT (1 << 5)
-#define GEN8_CTX_PRIVILEGE (1 << 8)
-#define GEN8_CTX_ADDRESSING_MODE_SHIFT 3
-
-#define GEN8_CTX_ID_SHIFT 32
-#define GEN8_CTX_ID_WIDTH 21
-#define GEN11_SW_CTX_ID_SHIFT 37
-#define GEN11_SW_CTX_ID_WIDTH 11
-#define GEN11_ENGINE_CLASS_SHIFT 61
-#define GEN11_ENGINE_CLASS_WIDTH 3
-#define GEN11_ENGINE_INSTANCE_SHIFT 48
-#define GEN11_ENGINE_INSTANCE_WIDTH 6
-
-#define XEHP_SW_CTX_ID_SHIFT 39
-#define XEHP_SW_CTX_ID_WIDTH 16
-#define XEHP_SW_COUNTER_SHIFT 58
-#define XEHP_SW_COUNTER_WIDTH 6
-
-#define UNSLCGCTL9440			_MMIO(0x9440)
-#define   GAMTLBOACS_CLKGATE_DIS	REG_BIT(28)
-#define   GAMTLBVDBOX5_CLKGATE_DIS	REG_BIT(27)
-#define   GAMTLBVDBOX6_CLKGATE_DIS	REG_BIT(26)
-#define   GAMTLBVDBOX3_CLKGATE_DIS	REG_BIT(24)
-#define   GAMTLBVDBOX4_CLKGATE_DIS	REG_BIT(23)
-#define   GAMTLBVDBOX7_CLKGATE_DIS	REG_BIT(22)
-#define   GAMTLBVDBOX2_CLKGATE_DIS	REG_BIT(21)
-#define   GAMTLBVDBOX0_CLKGATE_DIS	REG_BIT(17)
-#define   GAMTLBKCR_CLKGATE_DIS		REG_BIT(16)
-#define   GAMTLBGUC_CLKGATE_DIS		REG_BIT(15)
-#define   GAMTLBBLT_CLKGATE_DIS		REG_BIT(14)
-#define   GAMTLBVDBOX1_CLKGATE_DIS	REG_BIT(6)
-
-#define UNSLCGCTL9444			_MMIO(0x9444)
-#define   GAMTLBGFXA0_CLKGATE_DIS	REG_BIT(30)
-#define   GAMTLBGFXA1_CLKGATE_DIS	REG_BIT(29)
-#define   GAMTLBCOMPA0_CLKGATE_DIS	REG_BIT(28)
-#define   GAMTLBCOMPA1_CLKGATE_DIS	REG_BIT(27)
-#define   GAMTLBCOMPB0_CLKGATE_DIS	REG_BIT(26)
-#define   GAMTLBCOMPB1_CLKGATE_DIS	REG_BIT(25)
-#define   GAMTLBCOMPC0_CLKGATE_DIS	REG_BIT(24)
-#define   GAMTLBCOMPC1_CLKGATE_DIS	REG_BIT(23)
-#define   GAMTLBCOMPD0_CLKGATE_DIS	REG_BIT(22)
-#define   GAMTLBCOMPD1_CLKGATE_DIS	REG_BIT(21)
-#define   GAMTLBMERT_CLKGATE_DIS	REG_BIT(20)
-#define   GAMTLBVEBOX3_CLKGATE_DIS	REG_BIT(19)
-#define   GAMTLBVEBOX2_CLKGATE_DIS	REG_BIT(18)
-#define   GAMTLBVEBOX1_CLKGATE_DIS	REG_BIT(17)
-#define   GAMTLBVEBOX0_CLKGATE_DIS	REG_BIT(16)
-#define   LTCDD_CLKGATE_DIS		REG_BIT(10)
-
-#define SLICE_UNIT_LEVEL_CLKGATE	_MMIO(0x94d4)
-#define  SARBUNIT_CLKGATE_DIS		(1 << 5)
-#define  RCCUNIT_CLKGATE_DIS		(1 << 7)
-#define  MSCUNIT_CLKGATE_DIS		(1 << 10)
-#define  NODEDSS_CLKGATE_DIS		REG_BIT(12)
-#define  L3_CLKGATE_DIS			REG_BIT(16)
-#define  L3_CR2X_CLKGATE_DIS		REG_BIT(17)
-
-#define SUBSLICE_UNIT_LEVEL_CLKGATE	_MMIO(0x9524)
-#define   DSS_ROUTER_CLKGATE_DIS	REG_BIT(28)
-#define   GWUNIT_CLKGATE_DIS		REG_BIT(16)
-
-#define SUBSLICE_UNIT_LEVEL_CLKGATE2	_MMIO(0x9528)
-#define  CPSSUNIT_CLKGATE_DIS		REG_BIT(9)
-
-#define SSMCGCTL9530			_MMIO(0x9530)
-#define   RTFUNIT_CLKGATE_DIS		REG_BIT(18)
-
-#define UNSLICE_UNIT_LEVEL_CLKGATE	_MMIO(0x9434)
-#define   VFUNIT_CLKGATE_DIS		REG_BIT(20)
-#define   TSGUNIT_CLKGATE_DIS		REG_BIT(17) /* XEHPSDV */
-#define   CG3DDISCFEG_CLKGATE_DIS	REG_BIT(17) /* DG2 */
-#define   GAMEDIA_CLKGATE_DIS		REG_BIT(11)
-#define   HSUNIT_CLKGATE_DIS		REG_BIT(8)
-#define   VSUNIT_CLKGATE_DIS		REG_BIT(3)
-
-#define UNSLICE_UNIT_LEVEL_CLKGATE2	_MMIO(0x94e4)
-#define   VSUNIT_CLKGATE_DIS_TGL	REG_BIT(19)
-#define   PSDUNIT_CLKGATE_DIS		REG_BIT(5)
-
-#define INF_UNIT_LEVEL_CLKGATE		_MMIO(0x9560)
-#define   CGPSF_CLKGATE_DIS		(1 << 3)
-
-#define GEN11_GT_INTR_DW0		_MMIO(0x190018)
-#define  GEN11_CSME			(31)
-#define  GEN11_GUNIT			(28)
-#define  GEN11_GUC			(25)
-#define  GEN11_WDPERF			(20)
-#define  GEN11_KCR			(19)
-#define  GEN11_GTPM			(16)
-#define  GEN11_BCS			(15)
-#define  GEN11_RCS0			(0)
-
-#define GEN11_GT_INTR_DW1		_MMIO(0x19001c)
-#define  GEN11_VECS(x)			(31 - (x))
-#define  GEN11_VCS(x)			(x)
-
-#define GEN11_GT_INTR_DW(x)		_MMIO(0x190018 + ((x) * 4))
-
-#define GEN11_INTR_IDENTITY_REG0	_MMIO(0x190060)
-#define GEN11_INTR_IDENTITY_REG1	_MMIO(0x190064)
-#define  GEN11_INTR_DATA_VALID		(1 << 31)
-#define  GEN11_INTR_ENGINE_CLASS(x)	(((x) & GENMASK(18, 16)) >> 16)
-#define  GEN11_INTR_ENGINE_INSTANCE(x)	(((x) & GENMASK(25, 20)) >> 20)
-#define  GEN11_INTR_ENGINE_INTR(x)	((x) & 0xffff)
-/* irq instances for OTHER_CLASS */
-#define OTHER_GUC_INSTANCE	0
-#define OTHER_GTPM_INSTANCE	1
-#define OTHER_KCR_INSTANCE	4
-
-#define GEN11_INTR_IDENTITY_REG(x)	_MMIO(0x190060 + ((x) * 4))
-
-#define GEN11_IIR_REG0_SELECTOR		_MMIO(0x190070)
-#define GEN11_IIR_REG1_SELECTOR		_MMIO(0x190074)
-
-#define GEN11_IIR_REG_SELECTOR(x)	_MMIO(0x190070 + ((x) * 4))
-
-#define GEN11_RENDER_COPY_INTR_ENABLE	_MMIO(0x190030)
-#define GEN11_VCS_VECS_INTR_ENABLE	_MMIO(0x190034)
-#define GEN11_GUC_SG_INTR_ENABLE	_MMIO(0x190038)
-#define GEN11_GPM_WGBOXPERF_INTR_ENABLE	_MMIO(0x19003c)
-#define GEN11_CRYPTO_RSVD_INTR_ENABLE	_MMIO(0x190040)
-#define GEN11_GUNIT_CSME_INTR_ENABLE	_MMIO(0x190044)
-
-#define GEN11_RCS0_RSVD_INTR_MASK	_MMIO(0x190090)
-#define GEN11_BCS_RSVD_INTR_MASK	_MMIO(0x1900a0)
-#define GEN11_VCS0_VCS1_INTR_MASK	_MMIO(0x1900a8)
-#define GEN11_VCS2_VCS3_INTR_MASK	_MMIO(0x1900ac)
-#define GEN12_VCS4_VCS5_INTR_MASK	_MMIO(0x1900b0)
-#define GEN12_VCS6_VCS7_INTR_MASK	_MMIO(0x1900b4)
-#define GEN11_VECS0_VECS1_INTR_MASK	_MMIO(0x1900d0)
-#define GEN12_VECS2_VECS3_INTR_MASK	_MMIO(0x1900d4)
-#define GEN11_GUC_SG_INTR_MASK		_MMIO(0x1900e8)
-#define GEN11_GPM_WGBOXPERF_INTR_MASK	_MMIO(0x1900ec)
-#define GEN11_CRYPTO_RSVD_INTR_MASK	_MMIO(0x1900f0)
-#define GEN11_GUNIT_CSME_INTR_MASK	_MMIO(0x1900f4)
-
-#define   ENGINE1_MASK			REG_GENMASK(31, 16)
-#define   ENGINE0_MASK			REG_GENMASK(15, 0)
-
-#define GEN7_FF_SLICE_CS_CHICKEN1	_MMIO(0x20e0)
-#define   GEN9_FFSC_PERCTX_PREEMPT_CTRL	(1 << 14)
-
-#define FF_SLICE_CS_CHICKEN2			_MMIO(0x20e4)
-#define  GEN9_TSG_BARRIER_ACK_DISABLE		(1 << 8)
-#define  GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE  (1 << 10)
+#define CXT_SIZE				_MMIO(0x21a0)
+#define   GEN6_CXT_POWER_SIZE(cxt_reg)		(((cxt_reg) >> 24) & 0x3f)
+#define   GEN6_CXT_RING_SIZE(cxt_reg)		(((cxt_reg) >> 18) & 0x3f)
+#define   GEN6_CXT_RENDER_SIZE(cxt_reg)		(((cxt_reg) >> 12) & 0x3f)
+#define   GEN6_CXT_EXTENDED_SIZE(cxt_reg)	(((cxt_reg) >> 6) & 0x3f)
+#define   GEN6_CXT_PIPELINE_SIZE(cxt_reg)	(((cxt_reg) >> 0) & 0x3f)
+#define   GEN6_CXT_TOTAL_SIZE(cxt_reg)		(GEN6_CXT_RING_SIZE(cxt_reg) + \
+						GEN6_CXT_EXTENDED_SIZE(cxt_reg) + \
+						GEN6_CXT_PIPELINE_SIZE(cxt_reg))
+#define GEN7_CXT_SIZE				_MMIO(0x21a8)
+#define   GEN7_CXT_POWER_SIZE(ctx_reg)		(((ctx_reg) >> 25) & 0x7f)
+#define   GEN7_CXT_RING_SIZE(ctx_reg)		(((ctx_reg) >> 22) & 0x7)
+#define   GEN7_CXT_RENDER_SIZE(ctx_reg)		(((ctx_reg) >> 16) & 0x3f)
+#define   GEN7_CXT_EXTENDED_SIZE(ctx_reg)	(((ctx_reg) >> 9) & 0x7f)
+#define   GEN7_CXT_GT1_SIZE(ctx_reg)		(((ctx_reg) >> 6) & 0x7)
+#define   GEN7_CXT_VFSTATE_SIZE(ctx_reg)	(((ctx_reg) >> 0) & 0x3f)
+#define   GEN7_CXT_TOTAL_SIZE(ctx_reg)		(GEN7_CXT_EXTENDED_SIZE(ctx_reg) + \
+						 GEN7_CXT_VFSTATE_SIZE(ctx_reg))
+
+#define HSW_MI_PREDICATE_RESULT_2		_MMIO(0x2214)
+
+#define GEN9_CTX_PREEMPT_REG			_MMIO(0x2248)
+#define   GEN12_DISABLE_POSH_BUSY_FF_DOP_CG	REG_BIT(11)
+
+#define GPGPU_THREADS_DISPATCHED		_MMIO(0x2290)
+#define GPGPU_THREADS_DISPATCHED_UDW		_MMIO(0x2290 + 4)
+
+#define GEN9_RCS_FE_FSM2			_MMIO(0x22a4)
+#define GEN6_RCS_PWR_FSM			_MMIO(0x22ac)
+
+#define HS_INVOCATION_COUNT			_MMIO(0x2300)
+#define HS_INVOCATION_COUNT_UDW			_MMIO(0x2300 + 4)
+#define DS_INVOCATION_COUNT			_MMIO(0x2308)
+#define DS_INVOCATION_COUNT_UDW			_MMIO(0x2308 + 4)
+#define IA_VERTICES_COUNT			_MMIO(0x2310)
+#define IA_VERTICES_COUNT_UDW			_MMIO(0x2310 + 4)
+#define IA_PRIMITIVES_COUNT			_MMIO(0x2318)
+#define IA_PRIMITIVES_COUNT_UDW			_MMIO(0x2318 + 4)
+#define VS_INVOCATION_COUNT			_MMIO(0x2320)
+#define VS_INVOCATION_COUNT_UDW			_MMIO(0x2320 + 4)
+#define GS_INVOCATION_COUNT			_MMIO(0x2328)
+#define GS_INVOCATION_COUNT_UDW			_MMIO(0x2328 + 4)
+#define GS_PRIMITIVES_COUNT			_MMIO(0x2330)
+#define GS_PRIMITIVES_COUNT_UDW			_MMIO(0x2330 + 4)
+#define CL_INVOCATION_COUNT			_MMIO(0x2338)
+#define CL_INVOCATION_COUNT_UDW			_MMIO(0x2338 + 4)
+#define CL_PRIMITIVES_COUNT			_MMIO(0x2340)
+#define CL_PRIMITIVES_COUNT_UDW			_MMIO(0x2340 + 4)
+#define PS_INVOCATION_COUNT			_MMIO(0x2348)
+#define PS_INVOCATION_COUNT_UDW			_MMIO(0x2348 + 4)
+#define PS_DEPTH_COUNT				_MMIO(0x2350)
+#define PS_DEPTH_COUNT_UDW			_MMIO(0x2350 + 4)
+#define GEN7_3DPRIM_END_OFFSET			_MMIO(0x2420)
+#define GEN7_3DPRIM_START_VERTEX		_MMIO(0x2430)
+#define GEN7_3DPRIM_VERTEX_COUNT		_MMIO(0x2434)
+#define GEN7_3DPRIM_INSTANCE_COUNT		_MMIO(0x2438)
+#define GEN7_3DPRIM_START_INSTANCE		_MMIO(0x243c)
+#define GEN7_3DPRIM_BASE_VERTEX			_MMIO(0x2440)
+#define GEN7_GPGPU_DISPATCHDIMX			_MMIO(0x2500)
+#define GEN7_GPGPU_DISPATCHDIMY			_MMIO(0x2504)
+#define GEN7_GPGPU_DISPATCHDIMZ			_MMIO(0x2508)
+
+#define GFX_MODE				_MMIO(0x2520)
+
+#define GEN8_CS_CHICKEN1			_MMIO(0x2580)
+#define   GEN9_PREEMPT_3D_OBJECT_LEVEL		(1 << 0)
+#define   GEN9_PREEMPT_GPGPU_LEVEL(hi, lo)	(((hi) << 2) | ((lo) << 1))
+#define   GEN9_PREEMPT_GPGPU_MID_THREAD_LEVEL	GEN9_PREEMPT_GPGPU_LEVEL(0, 0)
+#define   GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL	GEN9_PREEMPT_GPGPU_LEVEL(0, 1)
+#define   GEN9_PREEMPT_GPGPU_COMMAND_LEVEL	GEN9_PREEMPT_GPGPU_LEVEL(1, 0)
+#define   GEN9_PREEMPT_GPGPU_LEVEL_MASK		GEN9_PREEMPT_GPGPU_LEVEL(1, 1)
+
+#define GEN12_GLOBAL_MOCS(i)			_MMIO(0x4000 + (i) * 4) /* Global MOCS regs */
+
+#define RENDER_HWS_PGA_GEN7			_MMIO(0x4080)
+
+#define GEN8_GAMW_ECO_DEV_RW_IA			_MMIO(0x4080)
+#define   GAMW_ECO_ENABLE_64K_IPS_FIELD		0xF
+#define   GAMW_ECO_DEV_CTX_RELOAD_DISABLE	(1 << 7)
 
-#define GEN9_CS_DEBUG_MODE1		_MMIO(0x20ec)
-#define   FF_DOP_CLOCK_GATE_DISABLE	REG_BIT(1)
-#define GEN9_CTX_PREEMPT_REG		_MMIO(0x2248)
-#define   GEN12_DISABLE_POSH_BUSY_FF_DOP_CG REG_BIT(11)
+#define GAM_ECOCHK				_MMIO(0x4090)
+#define   BDW_DISABLE_HDC_INVALIDATION		(1 << 25)
+#define   ECOCHK_SNB_BIT			(1 << 10)
+#define   ECOCHK_DIS_TLB			(1 << 8)
+#define   HSW_ECOCHK_ARB_PRIO_SOL		(1 << 6)
+#define   ECOCHK_PPGTT_CACHE64B			(0x3 << 3)
+#define   ECOCHK_PPGTT_CACHE4B			(0x0 << 3)
+#define   ECOCHK_PPGTT_GFDT_IVB			(0x1 << 4)
+#define   ECOCHK_PPGTT_LLC_IVB			(0x1 << 3)
+#define   ECOCHK_PPGTT_UC_HSW			(0x1 << 3)
+#define   ECOCHK_PPGTT_WT_HSW			(0x2 << 3)
+#define   ECOCHK_PPGTT_WB_HSW			(0x3 << 3)
+
+#define GEN8_RING_FAULT_REG			_MMIO(0x4094)
+#define _RING_FAULT_REG_RCS			0x4094
+#define _RING_FAULT_REG_VCS			0x4194
+#define _RING_FAULT_REG_BCS			0x4294
+#define _RING_FAULT_REG_VECS			0x4394
+#define RING_FAULT_REG(engine)			_MMIO(_PICK((engine)->class, \
+							    _RING_FAULT_REG_RCS, \
+							    _RING_FAULT_REG_VCS, \
+							    _RING_FAULT_REG_VECS, \
+							    _RING_FAULT_REG_BCS))
+
+#define ERROR_GEN6				_MMIO(0x40a0)
+
+#define DONE_REG				_MMIO(0x40b0)
+#define GEN8_PRIVATE_PAT_LO			_MMIO(0x40e0)
+#define GEN8_PRIVATE_PAT_HI			_MMIO(0x40e0 + 4)
+#define GEN10_PAT_INDEX(index)			_MMIO(0x40e0 + (index) * 4)
+#define BSD_HWS_PGA_GEN7			_MMIO(0x4180)
+#define GEN12_GFX_CCS_AUX_NV			_MMIO(0x4208)
+#define GEN12_VD0_AUX_NV			_MMIO(0x4218)
+#define GEN12_VD1_AUX_NV			_MMIO(0x4228)
+
+#define GEN8_RTCR				_MMIO(0x4260)
+#define GEN8_M1TCR				_MMIO(0x4264)
+#define GEN8_M2TCR				_MMIO(0x4268)
+#define GEN8_BTCR				_MMIO(0x426c)
+#define GEN8_VTCR				_MMIO(0x4270)
+
+#define GEN12_VD2_AUX_NV			_MMIO(0x4298)
+#define GEN12_VD3_AUX_NV			_MMIO(0x42a8)
+#define GEN12_VE0_AUX_NV			_MMIO(0x4238)
+
+#define BLT_HWS_PGA_GEN7			_MMIO(0x4280)
+
+#define GEN12_VE1_AUX_NV			_MMIO(0x42b8)
+#define   AUX_INV				REG_BIT(0)
+#define VEBOX_HWS_PGA_GEN7			_MMIO(0x4380)
+
+#define GEN12_AUX_ERR_DBG			_MMIO(0x43f4)
+
+#define GEN7_TLB_RD_ADDR			_MMIO(0x4700)
+
+#define GEN12_PAT_INDEX(index)			_MMIO(0x4800 + (index) * 4)
+
+#define XEHPSDV_FLAT_CCS_BASE_ADDR		_MMIO(0x4910)
+#define   XEHPSDV_CCS_BASE_SHIFT		8
+
+#define GAMTARBMODE				_MMIO(0x4a08)
+#define   ARB_MODE_BWGTLB_DISABLE		(1 << 9)
+#define   ARB_MODE_SWIZZLE_BDW			(1 << 1)
+
+#define GEN9_GAMT_ECO_REG_RW_IA			_MMIO(0x4ab0)
+#define   GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS	(1 << 18)
 
-#define GEN12_CS_DEBUG_MODE1_CCCSUNIT_BE_COMMON		_MMIO(0x20EC)
-#define   GEN12_REPLAY_MODE_GRANULARITY			REG_BIT(0)
+#define GAMT_CHKN_BIT_REG			_MMIO(0x4ab8)
+#define   GAMT_CHKN_DISABLE_L3_COH_PIPE		(1 << 31)
+#define   GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING	(1 << 28)
+#define   GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT	(1 << 24)
 
-#define GEN8_CS_CHICKEN1		_MMIO(0x2580)
-#define GEN9_PREEMPT_3D_OBJECT_LEVEL		(1 << 0)
-#define GEN9_PREEMPT_GPGPU_LEVEL(hi, lo)	(((hi) << 2) | ((lo) << 1))
-#define GEN9_PREEMPT_GPGPU_MID_THREAD_LEVEL	GEN9_PREEMPT_GPGPU_LEVEL(0, 0)
-#define GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL	GEN9_PREEMPT_GPGPU_LEVEL(0, 1)
-#define GEN9_PREEMPT_GPGPU_COMMAND_LEVEL	GEN9_PREEMPT_GPGPU_LEVEL(1, 0)
-#define GEN9_PREEMPT_GPGPU_LEVEL_MASK		GEN9_PREEMPT_GPGPU_LEVEL(1, 1)
+#define GEN8_FAULT_TLB_DATA0			_MMIO(0x4b10)
+#define GEN8_FAULT_TLB_DATA1			_MMIO(0x4b14)
 
-/* GEN7 chicken */
-#define GEN7_COMMON_SLICE_CHICKEN1		_MMIO(0x7010)
-  #define GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC	(1 << 10)
-  #define GEN9_RHWO_OPTIMIZATION_DISABLE	(1 << 14)
+#define GEN11_GACB_PERF_CTRL			_MMIO(0x4b80)
+#define   GEN11_HASH_CTRL_MASK			(0x3 << 12 | 0xf << 0)
+#define   GEN11_HASH_CTRL_BIT0			(1 << 0)
+#define   GEN11_HASH_CTRL_BIT4			(1 << 12)
 
-#define COMMON_SLICE_CHICKEN2					_MMIO(0x7014)
-  #define GEN9_PBE_COMPRESSED_HASH_SELECTION			(1 << 13)
-  #define GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE	(1 << 12)
-  #define GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION		(1 << 8)
-  #define GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE			(1 << 0)
+/* gamt regs */
+#define GEN8_L3_LRA_1_GPGPU			_MMIO(0x4dd4)
+#define   GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW	0x67F1427F /* max/min for LRA1/2 */
+#define   GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV	0x5FF101FF /* max/min for LRA1/2 */
+#define   GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL	0x67F1427F /*    "        " */
+#define   GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT	0x5FF101FF /*    "        " */
 
-#define GEN8_L3CNTLREG	_MMIO(0x7034)
-  #define GEN8_ERRDETBCTRL (1 << 9)
+#define MMCD_MISC_CTRL				_MMIO(0x4ddc) /* skl+ */
+#define   MMCD_PCLA				(1 << 31)
+#define   MMCD_HOTSPOT_EN			(1 << 27)
 
-#define GEN11_COMMON_SLICE_CHICKEN3			_MMIO(0x7304)
-#define   DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN	REG_BIT(12)
-#define   XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE		REG_BIT(12)
-#define   GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC		REG_BIT(11)
-#define   GEN12_DISABLE_CPS_AWARE_COLOR_PIPE		REG_BIT(9)
+/* There are the 4 64-bit counter registers, one for each stream output */
+#define GEN7_SO_NUM_PRIMS_WRITTEN(n)		_MMIO(0x5200 + (n) * 8)
+#define GEN7_SO_NUM_PRIMS_WRITTEN_UDW(n)	_MMIO(0x5200 + (n) * 8 + 4)
 
-#define HIZ_CHICKEN					_MMIO(0x7018)
-# define CHV_HZ_8X8_MODE_IN_1X				REG_BIT(15)
-# define DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE   REG_BIT(14)
-# define BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE	REG_BIT(3)
+#define GEN7_SO_PRIM_STORAGE_NEEDED(n)		_MMIO(0x5240 + (n) * 8)
+#define GEN7_SO_PRIM_STORAGE_NEEDED_UDW(n)	_MMIO(0x5240 + (n) * 8 + 4)
 
-#define GEN9_SLICE_COMMON_ECO_CHICKEN0		_MMIO(0x7308)
-#define  DISABLE_PIXEL_MASK_CAMMING		(1 << 14)
+#define GEN9_WM_CHICKEN3			_MMIO(0x5588)
+#define   GEN9_FACTOR_IN_CLR_VAL_HIZ		(1 << 9)
 
-#define GEN9_SLICE_COMMON_ECO_CHICKEN1		_MMIO(0x731c)
-#define   GEN11_STATE_CACHE_REDIRECT_TO_CS	(1 << 11)
+#define VFLSKPD					_MMIO(0x62a8)
+#define   DIS_OVER_FETCH_CACHE			REG_BIT(1)
+#define   DIS_MULT_MISS_RD_SQUASH		REG_BIT(0)
 
-#define GEN7_SARCHKMD				_MMIO(0xB000)
-#define GEN7_DISABLE_DEMAND_PREFETCH		(1 << 31)
-#define GEN7_DISABLE_SAMPLER_PREFETCH           (1 << 30)
+#define FF_MODE2				_MMIO(0x6604)
+#define   FF_MODE2_GS_TIMER_MASK		REG_GENMASK(31, 24)
+#define   FF_MODE2_GS_TIMER_224			REG_FIELD_PREP(FF_MODE2_GS_TIMER_MASK, 224)
+#define   FF_MODE2_TDS_TIMER_MASK		REG_GENMASK(23, 16)
+#define   FF_MODE2_TDS_TIMER_128		REG_FIELD_PREP(FF_MODE2_TDS_TIMER_MASK, 4)
 
-#define GEN7_L3SQCREG1				_MMIO(0xB010)
-#define  VLV_B0_WA_L3SQCREG1_VALUE		0x00D30000
+#define XEHPG_INSTDONE_GEOM_SVG			_MMIO(0x666c)
 
-#define GEN8_L3SQCREG1				_MMIO(0xB100)
-/*
- * Note that on CHV the following has an off-by-one error wrt. to BSpec.
- * Using the formula in BSpec leads to a hang, while the formula here works
- * fine and matches the formulas for all other platforms. A BSpec change
- * request has been filed to clarify this.
- */
-#define  L3_GENERAL_PRIO_CREDITS(x)		(((x) >> 1) << 19)
-#define  L3_HIGH_PRIO_CREDITS(x)		(((x) >> 1) << 14)
-#define  L3_PRIO_CREDITS_MASK			((0x1f << 19) | (0x1f << 14))
+#define CACHE_MODE_0_GEN7			_MMIO(0x7000) /* IVB+ */
+#define   RC_OP_FLUSH_ENABLE			(1 << 0)
+#define   HIZ_RAW_STALL_OPT_DISABLE		(1 << 2)
+#define CACHE_MODE_1				_MMIO(0x7004) /* IVB+ */
+#define   PIXEL_SUBSPAN_COLLECT_OPT_DISABLE	(1 << 6)
+#define   GEN8_4x4_STC_OPTIMIZATION_DISABLE	(1 << 6)
+#define   GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE	(1 << 1)
 
-#define GEN7_L3CNTLREG1				_MMIO(0xB01C)
-#define  GEN7_WA_FOR_GEN7_L3_CONTROL			0x3C47FF8C
-#define  GEN7_L3AGDIS				(1 << 19)
-#define GEN7_L3CNTLREG2				_MMIO(0xB020)
-#define GEN7_L3CNTLREG3				_MMIO(0xB024)
+#define GEN7_GT_MODE				_MMIO(0x7008)
+#define   GEN9_IZ_HASHING_MASK(slice)		(0x3 << ((slice) * 2))
+#define   GEN9_IZ_HASHING(slice, val)		((val) << ((slice) * 2))
 
-#define GEN7_L3_CHICKEN_MODE_REGISTER		_MMIO(0xB030)
-#define   GEN7_WA_L3_CHICKEN_MODE		0x20000000
-#define GEN10_L3_CHICKEN_MODE_REGISTER		_MMIO(0xB114)
-#define   GEN11_I2M_WRITE_DISABLE		(1 << 28)
+/* GEN7 chicken */
+#define GEN7_COMMON_SLICE_CHICKEN1		_MMIO(0x7010)
+#define   GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC	(1 << 10)
+#define   GEN9_RHWO_OPTIMIZATION_DISABLE	(1 << 14)
 
-#define GEN7_L3SQCREG4				_MMIO(0xb034)
-#define  L3SQ_URB_READ_CAM_MATCH_DISABLE	(1 << 27)
+#define COMMON_SLICE_CHICKEN2			_MMIO(0x7014)
+#define   GEN9_PBE_COMPRESSED_HASH_SELECTION	(1 << 13)
+#define   GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE	(1 << 12)
+#define   GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION	(1 << 8)
+#define   GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE	(1 << 0)
 
-#define GEN11_SCRATCH2					_MMIO(0xb140)
-#define  GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE	(1 << 19)
+#define HIZ_CHICKEN				_MMIO(0x7018)
+#define   CHV_HZ_8X8_MODE_IN_1X			REG_BIT(15)
+#define   DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE	REG_BIT(14)
+#define   BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE	REG_BIT(3)
 
-#define GEN8_L3SQCREG4				_MMIO(0xb118)
-#define  GEN11_LQSC_CLEAN_EVICT_DISABLE		(1 << 6)
-#define  GEN8_LQSC_RO_PERF_DIS			(1 << 27)
-#define  GEN8_LQSC_FLUSH_COHERENT_LINES		(1 << 21)
-#define  GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE REG_BIT(22)
+#define GEN8_L3CNTLREG				_MMIO(0x7034)
+#define   GEN8_ERRDETBCTRL			(1 << 9)
 
-#define GEN11_L3SQCREG5				_MMIO(0xb158)
-#define   L3_PWM_TIMER_INIT_VAL_MASK		REG_GENMASK(9, 0)
-
-#define XEHP_L3SCQREG7				_MMIO(0xb188)
-#define   BLEND_FILL_CACHING_OPT_DIS		REG_BIT(3)
+#define GEN7_SC_INSTDONE			_MMIO(0x7100)
+#define GEN12_SC_INSTDONE_EXTRA			_MMIO(0x7104)
+#define GEN12_SC_INSTDONE_EXTRA2		_MMIO(0x7108)
 
 /* GEN8 chicken */
 #define HDC_CHICKEN0				_MMIO(0x7300)
-#define ICL_HDC_MODE				_MMIO(0xE5F4)
-#define  HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE	(1 << 15)
-#define  HDC_FENCE_DEST_SLM_DISABLE		(1 << 14)
-#define  HDC_DONOT_FETCH_MEM_WHEN_MASKED	(1 << 11)
-#define  HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT	(1 << 5)
-#define  HDC_FORCE_NON_COHERENT			(1 << 4)
-#define  HDC_BARRIER_PERFORMANCE_DISABLE	(1 << 10)
-
-#define GEN12_HDC_CHICKEN0					_MMIO(0xE5F0)
-#define   LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK	REG_GENMASK(13, 11)
-
-#define SARB_CHICKEN1				_MMIO(0xe90c)
-#define   COMP_CKN_IN				REG_GENMASK(30, 29)
+#define   HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE	(1 << 15)
+#define   HDC_FENCE_DEST_SLM_DISABLE		(1 << 14)
+#define   HDC_DONOT_FETCH_MEM_WHEN_MASKED	(1 << 11)
+#define   HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT	(1 << 5)
+#define   HDC_FORCE_NON_COHERENT		(1 << 4)
+#define   HDC_BARRIER_PERFORMANCE_DISABLE	(1 << 10)
 
 #define GEN8_HDC_CHICKEN1			_MMIO(0x7304)
 
+#define GEN11_COMMON_SLICE_CHICKEN3		_MMIO(0x7304)
+#define   DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN	REG_BIT(12)
+#define   XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE	REG_BIT(12)
+#define   GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC	REG_BIT(11)
+#define   GEN12_DISABLE_CPS_AWARE_COLOR_PIPE	REG_BIT(9)
+
 /* GEN9 chicken */
 #define SLICE_ECO_CHICKEN0			_MMIO(0x7308)
 #define   PIXEL_MASK_CAMMING_DISABLE		(1 << 14)
 
-#define GEN9_WM_CHICKEN3			_MMIO(0x5588)
-#define   GEN9_FACTOR_IN_CLR_VAL_HIZ		(1 << 9)
+#define GEN9_SLICE_COMMON_ECO_CHICKEN0		_MMIO(0x7308)
+#define   DISABLE_PIXEL_MASK_CAMMING		(1 << 14)
+
+#define GEN9_SLICE_COMMON_ECO_CHICKEN1		_MMIO(0x731c)
+#define   GEN11_STATE_CACHE_REDIRECT_TO_CS	(1 << 11)
+
+#define SLICE_COMMON_ECO_CHICKEN1		_MMIO(0x731c)
+#define   MSC_MSAA_REODER_BUF_BYPASS_DISABLE	REG_BIT(14)
+
+#define GEN9_SLICE_PGCTL_ACK(slice)		_MMIO(0x804c + (slice) * 0x4)
+#define GEN10_SLICE_PGCTL_ACK(slice)		_MMIO(0x804c + ((slice) / 3) * 0x34 + \
+						      ((slice) % 3) * 0x4)
+#define   GEN9_PGCTL_SLICE_ACK			(1 << 0)
+#define   GEN9_PGCTL_SS_ACK(subslice)		(1 << (2 + (subslice) * 2))
+#define   GEN10_PGCTL_VALID_SS_MASK(slice)	((slice) == 0 ? 0x7F : 0x1F)
+
+#define GEN9_SS01_EU_PGCTL_ACK(slice)		_MMIO(0x805c + (slice) * 0x8)
+#define GEN10_SS01_EU_PGCTL_ACK(slice)		_MMIO(0x805c + ((slice) / 3) * 0x30 + \
+						      ((slice) % 3) * 0x8)
+#define GEN9_SS23_EU_PGCTL_ACK(slice)		_MMIO(0x8060 + (slice) * 0x8)
+#define GEN10_SS23_EU_PGCTL_ACK(slice)		_MMIO(0x8060 + ((slice) / 3) * 0x30 + \
+						      ((slice) % 3) * 0x8)
+#define   GEN9_PGCTL_SSA_EU08_ACK		(1 << 0)
+#define   GEN9_PGCTL_SSA_EU19_ACK		(1 << 2)
+#define   GEN9_PGCTL_SSA_EU210_ACK		(1 << 4)
+#define   GEN9_PGCTL_SSA_EU311_ACK		(1 << 6)
+#define   GEN9_PGCTL_SSB_EU08_ACK		(1 << 8)
+#define   GEN9_PGCTL_SSB_EU19_ACK		(1 << 10)
+#define   GEN9_PGCTL_SSB_EU210_ACK		(1 << 12)
+#define   GEN9_PGCTL_SSB_EU311_ACK		(1 << 14)
+
+#define GEN8_RC6_CTX_INFO			_MMIO(0x8504)
+
+#define GEN12_SQCM				_MMIO(0x8724)
+#define   EN_32B_ACCESS				REG_BIT(30)
+
+#define HSW_IDICR				_MMIO(0x9008)
+#define   IDIHASHMSK(x)				(((x) & 0x3f) << 16)
+
+#define GEN6_MBCUNIT_SNPCR			_MMIO(0x900c) /* for LLC config */
+#define   GEN6_MBC_SNPCR_SHIFT			21
+#define   GEN6_MBC_SNPCR_MASK			(3 << 21)
+#define   GEN6_MBC_SNPCR_MAX			(0 << 21)
+#define   GEN6_MBC_SNPCR_MED			(1 << 21)
+#define   GEN6_MBC_SNPCR_LOW			(2 << 21)
+#define   GEN6_MBC_SNPCR_MIN			(3 << 21) /* only 1/16th of the cache is shared */
+
+#define VLV_G3DCTL				_MMIO(0x9024)
+#define VLV_GSCKGCTL				_MMIO(0x9028)
 
 /* WaCatErrorRejectionIssue */
 #define GEN7_SQ_CHICKEN_MBCUNIT_CONFIG		_MMIO(0x9030)
-#define  GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB	(1 << 11)
+#define   GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB	(1 << 11)
 
-#define HSW_SCRATCH1				_MMIO(0xb038)
-#define  HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE	(1 << 27)
-
-#define BDW_SCRATCH1					_MMIO(0xb11c)
-#define  GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE	(1 << 2)
-
-#define VFLSKPD				_MMIO(0x62a8)
-#define   DIS_OVER_FETCH_CACHE		REG_BIT(1)
-#define   DIS_MULT_MISS_RD_SQUASH	REG_BIT(0)
-
-#define FF_MODE2			_MMIO(0x6604)
-#define   FF_MODE2_GS_TIMER_MASK	REG_GENMASK(31, 24)
-#define   FF_MODE2_GS_TIMER_224		REG_FIELD_PREP(FF_MODE2_GS_TIMER_MASK, 224)
-#define   FF_MODE2_TDS_TIMER_MASK	REG_GENMASK(23, 16)
-#define   FF_MODE2_TDS_TIMER_128	REG_FIELD_PREP(FF_MODE2_TDS_TIMER_MASK, 4)
-
-#define  RC6_LOCATION				_MMIO(0xD40)
-#define	   RC6_CTX_IN_DRAM			(1 << 0)
-#define  RC6_CTX_BASE				_MMIO(0xD48)
-#define    RC6_CTX_BASE_MASK			0xFFFFFFF0
-#define  FORCEWAKE				_MMIO(0xA18C)
-#define  FORCEWAKE_VLV				_MMIO(0x1300b0)
-#define  FORCEWAKE_ACK_VLV			_MMIO(0x1300b4)
-#define  FORCEWAKE_MEDIA_VLV			_MMIO(0x1300b8)
-#define  FORCEWAKE_ACK_MEDIA_VLV		_MMIO(0x1300bc)
-#define  FORCEWAKE_ACK_HSW			_MMIO(0x130044)
-#define  FORCEWAKE_ACK				_MMIO(0x130090)
-#define  VLV_GTLC_WAKE_CTRL			_MMIO(0x130090)
-#define   VLV_GTLC_RENDER_CTX_EXISTS		(1 << 25)
-#define   VLV_GTLC_MEDIA_CTX_EXISTS		(1 << 24)
-#define   VLV_GTLC_ALLOWWAKEREQ			(1 << 0)
+#define FBC_LLC_READ_CTRL			_MMIO(0x9044)
+#define   FBC_LLC_FULLY_OPEN			REG_BIT(30)
 
-#define  VLV_GTLC_PW_STATUS			_MMIO(0x130094)
-#define   VLV_GTLC_ALLOWWAKEACK			(1 << 0)
-#define   VLV_GTLC_ALLOWWAKEERR			(1 << 1)
-#define   VLV_GTLC_PW_MEDIA_STATUS_MASK		(1 << 5)
-#define   VLV_GTLC_PW_RENDER_STATUS_MASK	(1 << 7)
-#define  FORCEWAKE_MT				_MMIO(0xa188) /* multi-threaded */
-#define  FORCEWAKE_MEDIA_GEN9			_MMIO(0xa270)
-#define  FORCEWAKE_MEDIA_VDBOX_GEN11(n)		_MMIO(0xa540 + (n) * 4)
-#define  FORCEWAKE_MEDIA_VEBOX_GEN11(n)		_MMIO(0xa560 + (n) * 4)
-#define  FORCEWAKE_RENDER_GEN9			_MMIO(0xa278)
-#define  FORCEWAKE_GT_GEN9			_MMIO(0xa188)
-#define  FORCEWAKE_ACK_MEDIA_GEN9		_MMIO(0x0D88)
-#define  FORCEWAKE_ACK_MEDIA_VDBOX_GEN11(n)	_MMIO(0x0D50 + (n) * 4)
-#define  FORCEWAKE_ACK_MEDIA_VEBOX_GEN11(n)	_MMIO(0x0D70 + (n) * 4)
-#define  FORCEWAKE_ACK_RENDER_GEN9		_MMIO(0x0D84)
-#define  FORCEWAKE_ACK_GT_GEN9			_MMIO(0x130044)
-#define   FORCEWAKE_KERNEL			BIT(0)
-#define   FORCEWAKE_USER			BIT(1)
-#define   FORCEWAKE_KERNEL_FALLBACK		BIT(15)
-#define  FORCEWAKE_MT_ACK			_MMIO(0x130040)
-#define  ECOBUS					_MMIO(0xa180)
-#define    FORCEWAKE_MT_ENABLE			(1 << 5)
-#define  VLV_SPAREG2H				_MMIO(0xA194)
-#define  GEN9_PWRGT_DOMAIN_STATUS		_MMIO(0xA2A0)
-#define   GEN9_PWRGT_MEDIA_STATUS_MASK		(1 << 0)
-#define   GEN9_PWRGT_RENDER_STATUS_MASK		(1 << 1)
+#define GEN6_MBCTL				_MMIO(0x907c)
+#define   GEN6_MBCTL_ENABLE_BOOT_FETCH		(1 << 4)
+#define   GEN6_MBCTL_CTX_FETCH_NEEDED		(1 << 3)
+#define   GEN6_MBCTL_BME_UPDATE_ENABLE		(1 << 2)
+#define   GEN6_MBCTL_MAE_UPDATE_ENABLE		(1 << 1)
+#define   GEN6_MBCTL_BOOT_FETCH_MECH		(1 << 0)
 
-#define  GTFIFODBG				_MMIO(0x120000)
-#define    GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV	(0x1f << 20)
-#define    GT_FIFO_FREE_ENTRIES_CHV		(0x7f << 13)
-#define    GT_FIFO_SBDROPERR			(1 << 6)
-#define    GT_FIFO_BLOBDROPERR			(1 << 5)
-#define    GT_FIFO_SB_READ_ABORTERR		(1 << 4)
-#define    GT_FIFO_DROPERR			(1 << 3)
-#define    GT_FIFO_OVFERR			(1 << 2)
-#define    GT_FIFO_IAWRERR			(1 << 1)
-#define    GT_FIFO_IARDERR			(1 << 0)
-
-#define  GTFIFOCTL				_MMIO(0x120008)
-#define    GT_FIFO_FREE_ENTRIES_MASK		0x7f
-#define    GT_FIFO_NUM_RESERVED_ENTRIES		20
-#define    GT_FIFO_CTL_BLOCK_ALL_POLICY_STALL	(1 << 12)
-#define    GT_FIFO_CTL_RC6_POLICY_STALL		(1 << 11)
-
-#define  HSW_IDICR				_MMIO(0x9008)
-#define    IDIHASHMSK(x)			(((x) & 0x3f) << 16)
+/* Fuse readout registers for GT */
+#define	GEN10_MIRROR_FUSE3			_MMIO(0x9118)
+#define   GEN10_L3BANK_PAIR_COUNT		4
+#define   GEN10_L3BANK_MASK			0x0F
+/* on Xe_HP the same fuses indicates mslices instead of L3 banks */
+#define   GEN12_MAX_MSLICES			4
+#define   GEN12_MEML3_EN_MASK			0x0F
+
+#define HSW_PAVP_FUSE1				_MMIO(0x911c)
+#define   XEHP_SFC_ENABLE_MASK			REG_GENMASK(27, 24)
+#define   HSW_F1_EU_DIS_MASK			REG_GENMASK(17, 16)
+#define   HSW_F1_EU_DIS_10EUS			0
+#define   HSW_F1_EU_DIS_8EUS			1
+#define   HSW_F1_EU_DIS_6EUS			2
+
+#define GEN8_FUSE2				_MMIO(0x9120)
+#define   GEN8_F2_SS_DIS_SHIFT			21
+#define   GEN8_F2_SS_DIS_MASK			(0x7 << GEN8_F2_SS_DIS_SHIFT)
+#define   GEN8_F2_S_ENA_SHIFT			25
+#define   GEN8_F2_S_ENA_MASK			(0x7 << GEN8_F2_S_ENA_SHIFT)
+#define   GEN9_F2_SS_DIS_SHIFT			20
+#define   GEN9_F2_SS_DIS_MASK			(0xf << GEN9_F2_SS_DIS_SHIFT)
+#define   GEN10_F2_S_ENA_SHIFT			22
+#define   GEN10_F2_S_ENA_MASK			(0x3f << GEN10_F2_S_ENA_SHIFT)
+#define   GEN10_F2_SS_DIS_SHIFT			18
+#define   GEN10_F2_SS_DIS_MASK			(0xf << GEN10_F2_SS_DIS_SHIFT)
+
+#define GEN8_EU_DISABLE0			_MMIO(0x9134)
+#define GEN9_EU_DISABLE(slice)			_MMIO(0x9134 + (slice) * 0x4)
+#define GEN11_EU_DISABLE			_MMIO(0x9134)
+#define   GEN8_EU_DIS0_S0_MASK			0xffffff
+#define   GEN8_EU_DIS0_S1_SHIFT			24
+#define   GEN8_EU_DIS0_S1_MASK			(0xff << GEN8_EU_DIS0_S1_SHIFT)
+#define   GEN11_EU_DIS_MASK			0xFF
+#define XEHP_EU_ENABLE				_MMIO(0x9134)
+#define   XEHP_EU_ENA_MASK			0xFF
+
+#define GEN8_EU_DISABLE1			_MMIO(0x9138)
+#define   GEN8_EU_DIS1_S1_MASK			0xffff
+#define   GEN8_EU_DIS1_S2_SHIFT			16
+#define   GEN8_EU_DIS1_S2_MASK			(0xffff << GEN8_EU_DIS1_S2_SHIFT)
+
+#define GEN11_GT_SLICE_ENABLE			_MMIO(0x9138)
+#define   GEN11_GT_S_ENA_MASK			0xFF
+
+#define GEN8_EU_DISABLE2			_MMIO(0x913c)
+#define   GEN8_EU_DIS2_S2_MASK			0xff
+
+#define GEN11_GT_SUBSLICE_DISABLE		_MMIO(0x913c)
+#define GEN12_GT_GEOMETRY_DSS_ENABLE		_MMIO(0x913c)
+
+#define GEN10_EU_DISABLE3			_MMIO(0x9140)
+#define   GEN10_EU_DIS_SS_MASK			0xff
+#define GEN11_GT_VEBOX_VDBOX_DISABLE		_MMIO(0x9140)
+#define   GEN11_GT_VDBOX_DISABLE_MASK		0xff
+#define   GEN11_GT_VEBOX_DISABLE_SHIFT		16
+#define   GEN11_GT_VEBOX_DISABLE_MASK		(0x0f << GEN11_GT_VEBOX_DISABLE_SHIFT)
+
+#define GEN12_GT_COMPUTE_DSS_ENABLE		_MMIO(0x9144)
 
 #define GEN6_UCGCTL1				_MMIO(0x9400)
-# define GEN6_GAMUNIT_CLOCK_GATE_DISABLE		(1 << 22)
-# define GEN6_EU_TCUNIT_CLOCK_GATE_DISABLE		(1 << 16)
-# define GEN6_BLBUNIT_CLOCK_GATE_DISABLE		(1 << 5)
-# define GEN6_CSUNIT_CLOCK_GATE_DISABLE			(1 << 7)
+#define   GEN6_GAMUNIT_CLOCK_GATE_DISABLE	(1 << 22)
+#define   GEN6_EU_TCUNIT_CLOCK_GATE_DISABLE	(1 << 16)
+#define   GEN6_BLBUNIT_CLOCK_GATE_DISABLE	(1 << 5)
+#define   GEN6_CSUNIT_CLOCK_GATE_DISABLE		(1 << 7)
 
 #define GEN6_UCGCTL2				_MMIO(0x9404)
-# define GEN6_VFUNIT_CLOCK_GATE_DISABLE			(1 << 31)
-# define GEN7_VDSUNIT_CLOCK_GATE_DISABLE		(1 << 30)
-# define GEN7_TDLUNIT_CLOCK_GATE_DISABLE		(1 << 22)
-# define GEN6_RCZUNIT_CLOCK_GATE_DISABLE		(1 << 13)
-# define GEN6_RCPBUNIT_CLOCK_GATE_DISABLE		(1 << 12)
-# define GEN6_RCCUNIT_CLOCK_GATE_DISABLE		(1 << 11)
+#define   GEN6_VFUNIT_CLOCK_GATE_DISABLE	(1 << 31)
+#define   GEN7_VDSUNIT_CLOCK_GATE_DISABLE	(1 << 30)
+#define   GEN7_TDLUNIT_CLOCK_GATE_DISABLE	(1 << 22)
+#define   GEN6_RCZUNIT_CLOCK_GATE_DISABLE	(1 << 13)
+#define   GEN6_RCPBUNIT_CLOCK_GATE_DISABLE	(1 << 12)
+#define   GEN6_RCCUNIT_CLOCK_GATE_DISABLE	(1 << 11)
 
 #define GEN6_UCGCTL3				_MMIO(0x9408)
-# define GEN6_OACSUNIT_CLOCK_GATE_DISABLE		(1 << 20)
+#define   GEN6_OACSUNIT_CLOCK_GATE_DISABLE	(1 << 20)
 
 #define GEN7_UCGCTL4				_MMIO(0x940c)
-#define  GEN7_L3BANK2X_CLOCK_GATE_DISABLE	(1 << 25)
-#define  GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE	(1 << 14)
+#define   GEN7_L3BANK2X_CLOCK_GATE_DISABLE	(1 << 25)
+#define   GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE	(1 << 14)
 
 #define GEN6_RCGCTL1				_MMIO(0x9410)
 #define GEN6_RCGCTL2				_MMIO(0x9414)
+
+#define GEN6_GDRST				_MMIO(0x941c)
+#define   GEN6_GRDOM_FULL			(1 << 0)
+#define   GEN6_GRDOM_RENDER			(1 << 1)
+#define   GEN6_GRDOM_MEDIA			(1 << 2)
+#define   GEN6_GRDOM_BLT			(1 << 3)
+#define   GEN6_GRDOM_VECS			(1 << 4)
+#define   GEN9_GRDOM_GUC			(1 << 5)
+#define   GEN8_GRDOM_MEDIA2			(1 << 7)
+/* GEN11 changed all bit defs except for FULL & RENDER */
+#define   GEN11_GRDOM_FULL			GEN6_GRDOM_FULL
+#define   GEN11_GRDOM_RENDER			GEN6_GRDOM_RENDER
+#define   GEN11_GRDOM_BLT			(1 << 2)
+#define   GEN11_GRDOM_GUC			(1 << 3)
+#define   GEN11_GRDOM_MEDIA			(1 << 5)
+#define   GEN11_GRDOM_MEDIA2			(1 << 6)
+#define   GEN11_GRDOM_MEDIA3			(1 << 7)
+#define   GEN11_GRDOM_MEDIA4			(1 << 8)
+#define   GEN11_GRDOM_MEDIA5			(1 << 9)
+#define   GEN11_GRDOM_MEDIA6			(1 << 10)
+#define   GEN11_GRDOM_MEDIA7			(1 << 11)
+#define   GEN11_GRDOM_MEDIA8			(1 << 12)
+#define   GEN11_GRDOM_VECS			(1 << 13)
+#define   GEN11_GRDOM_VECS2			(1 << 14)
+#define   GEN11_GRDOM_VECS3			(1 << 15)
+#define   GEN11_GRDOM_VECS4			(1 << 16)
+#define   GEN11_GRDOM_SFC0			(1 << 17)
+#define   GEN11_GRDOM_SFC1			(1 << 18)
+#define   GEN11_GRDOM_SFC2			(1 << 19)
+#define   GEN11_GRDOM_SFC3			(1 << 20)
+#define   GEN11_VCS_SFC_RESET_BIT(instance)	(GEN11_GRDOM_SFC0 << ((instance) >> 1))
+#define   GEN11_VECS_SFC_RESET_BIT(instance)	(GEN11_GRDOM_SFC0 << (instance))
+
 #define GEN6_RSTCTL				_MMIO(0x9420)
 
+#define GEN7_MISCCPCTL				_MMIO(0x9424)
+#define   GEN7_DOP_CLOCK_GATE_ENABLE		(1 << 0)
+#define   GEN8_DOP_CLOCK_GATE_CFCLK_ENABLE	(1 << 2)
+#define   GEN8_DOP_CLOCK_GATE_GUC_ENABLE	(1 << 4)
+#define   GEN8_DOP_CLOCK_GATE_MEDIA_ENABLE	(1 << 6)
+
 #define GEN8_UCGCTL6				_MMIO(0x9430)
 #define   GEN8_GAPSUNIT_CLOCK_GATE_DISABLE	(1 << 24)
 #define   GEN8_SDEUNIT_CLOCK_GATE_DISABLE	(1 << 14)
-#define   GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ (1 << 28)
+#define   GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ	(1 << 28)
 
 #define UNSLCGCTL9430				_MMIO(0x9430)
 #define   MSQDUNIT_CLKGATE_DIS			REG_BIT(3)
 
-#define GEN6_GFXPAUSE				_MMIO(0xA000)
-#define GEN6_RPNSWREQ				_MMIO(0xA008)
+#define UNSLICE_UNIT_LEVEL_CLKGATE		_MMIO(0x9434)
+#define   VFUNIT_CLKGATE_DIS			REG_BIT(20)
+#define   TSGUNIT_CLKGATE_DIS			REG_BIT(17) /* XEHPSDV */
+#define   CG3DDISCFEG_CLKGATE_DIS		REG_BIT(17) /* DG2 */
+#define   GAMEDIA_CLKGATE_DIS			REG_BIT(11)
+#define   HSUNIT_CLKGATE_DIS			REG_BIT(8)
+#define   VSUNIT_CLKGATE_DIS			REG_BIT(3)
+
+#define UNSLCGCTL9440				_MMIO(0x9440)
+#define   GAMTLBOACS_CLKGATE_DIS		REG_BIT(28)
+#define   GAMTLBVDBOX5_CLKGATE_DIS		REG_BIT(27)
+#define   GAMTLBVDBOX6_CLKGATE_DIS		REG_BIT(26)
+#define   GAMTLBVDBOX3_CLKGATE_DIS		REG_BIT(24)
+#define   GAMTLBVDBOX4_CLKGATE_DIS		REG_BIT(23)
+#define   GAMTLBVDBOX7_CLKGATE_DIS		REG_BIT(22)
+#define   GAMTLBVDBOX2_CLKGATE_DIS		REG_BIT(21)
+#define   GAMTLBVDBOX0_CLKGATE_DIS		REG_BIT(17)
+#define   GAMTLBKCR_CLKGATE_DIS			REG_BIT(16)
+#define   GAMTLBGUC_CLKGATE_DIS			REG_BIT(15)
+#define   GAMTLBBLT_CLKGATE_DIS			REG_BIT(14)
+#define   GAMTLBVDBOX1_CLKGATE_DIS		REG_BIT(6)
+
+#define UNSLCGCTL9444				_MMIO(0x9444)
+#define   GAMTLBGFXA0_CLKGATE_DIS		REG_BIT(30)
+#define   GAMTLBGFXA1_CLKGATE_DIS		REG_BIT(29)
+#define   GAMTLBCOMPA0_CLKGATE_DIS		REG_BIT(28)
+#define   GAMTLBCOMPA1_CLKGATE_DIS		REG_BIT(27)
+#define   GAMTLBCOMPB0_CLKGATE_DIS		REG_BIT(26)
+#define   GAMTLBCOMPB1_CLKGATE_DIS		REG_BIT(25)
+#define   GAMTLBCOMPC0_CLKGATE_DIS		REG_BIT(24)
+#define   GAMTLBCOMPC1_CLKGATE_DIS		REG_BIT(23)
+#define   GAMTLBCOMPD0_CLKGATE_DIS		REG_BIT(22)
+#define   GAMTLBCOMPD1_CLKGATE_DIS		REG_BIT(21)
+#define   GAMTLBMERT_CLKGATE_DIS		REG_BIT(20)
+#define   GAMTLBVEBOX3_CLKGATE_DIS		REG_BIT(19)
+#define   GAMTLBVEBOX2_CLKGATE_DIS		REG_BIT(18)
+#define   GAMTLBVEBOX1_CLKGATE_DIS		REG_BIT(17)
+#define   GAMTLBVEBOX0_CLKGATE_DIS		REG_BIT(16)
+#define   LTCDD_CLKGATE_DIS			REG_BIT(10)
+
+#define SLICE_UNIT_LEVEL_CLKGATE		_MMIO(0x94d4)
+#define   SARBUNIT_CLKGATE_DIS			(1 << 5)
+#define   RCCUNIT_CLKGATE_DIS			(1 << 7)
+#define   MSCUNIT_CLKGATE_DIS			(1 << 10)
+#define   NODEDSS_CLKGATE_DIS			REG_BIT(12)
+#define   L3_CLKGATE_DIS			REG_BIT(16)
+#define   L3_CR2X_CLKGATE_DIS			REG_BIT(17)
+
+#define SCCGCTL94DC				_MMIO(0x94dc)
+#define   CG3DDISURB				REG_BIT(14)
+
+#define UNSLICE_UNIT_LEVEL_CLKGATE2		_MMIO(0x94e4)
+#define   VSUNIT_CLKGATE_DIS_TGL		REG_BIT(19)
+#define   PSDUNIT_CLKGATE_DIS			REG_BIT(5)
+
+#define SUBSLICE_UNIT_LEVEL_CLKGATE		_MMIO(0x9524)
+#define   DSS_ROUTER_CLKGATE_DIS		REG_BIT(28)
+#define   GWUNIT_CLKGATE_DIS			REG_BIT(16)
+
+#define SUBSLICE_UNIT_LEVEL_CLKGATE2		_MMIO(0x9528)
+#define   CPSSUNIT_CLKGATE_DIS			REG_BIT(9)
+
+#define SSMCGCTL9530				_MMIO(0x9530)
+#define   RTFUNIT_CLKGATE_DIS			REG_BIT(18)
+
+#define GEN10_DFR_RATIO_EN_AND_CHICKEN		_MMIO(0x9550)
+#define   DFR_DISABLE				(1 << 9)
+
+#define INF_UNIT_LEVEL_CLKGATE			_MMIO(0x9560)
+#define   CGPSF_CLKGATE_DIS			(1 << 3)
+
+#define MICRO_BP0_0				_MMIO(0x9800)
+#define MICRO_BP0_2				_MMIO(0x9804)
+#define MICRO_BP0_1				_MMIO(0x9808)
+#define MICRO_BP1_0				_MMIO(0x980c)
+#define MICRO_BP1_2				_MMIO(0x9810)
+#define MICRO_BP1_1				_MMIO(0x9814)
+#define MICRO_BP2_0				_MMIO(0x9818)
+#define MICRO_BP2_2				_MMIO(0x981c)
+#define MICRO_BP2_1				_MMIO(0x9820)
+#define MICRO_BP3_0				_MMIO(0x9824)
+#define MICRO_BP3_2				_MMIO(0x9828)
+#define MICRO_BP3_1				_MMIO(0x982c)
+#define MICRO_BP_TRIGGER			_MMIO(0x9830)
+#define MICRO_BP3_COUNT_STATUS01		_MMIO(0x9834)
+#define MICRO_BP3_COUNT_STATUS23		_MMIO(0x9838)
+#define MICRO_BP_FIRED_ARMED			_MMIO(0x983c)
+
+#define GEN6_GFXPAUSE				_MMIO(0xa000)
+#define GEN6_RPNSWREQ				_MMIO(0xa008)
 #define   GEN6_TURBO_DISABLE			(1 << 31)
 #define   GEN6_FREQUENCY(x)			((x) << 25)
 #define   HSW_FREQUENCY(x)			((x) << 24)
@@ -1228,8 +729,7 @@ enum {
 #define   GEN9_SW_REQ_UNSLICE_RATIO_SHIFT	23
 #define   GEN9_IGNORE_SLICE_RATIO		(0 << 0)
 
-#define GEN6_RC_VIDEO_FREQ			_MMIO(0xA00C)
-#define GEN6_RC_CONTROL				_MMIO(0xA090)
+#define GEN6_RC_VIDEO_FREQ			_MMIO(0xa00c)
 #define   GEN6_RC_CTL_RC6pp_ENABLE		(1 << 16)
 #define   GEN6_RC_CTL_RC6p_ENABLE		(1 << 17)
 #define   GEN6_RC_CTL_RC6_ENABLE		(1 << 18)
@@ -1239,16 +739,16 @@ enum {
 #define   GEN7_RC_CTL_TO_MODE			(1 << 28)
 #define   GEN6_RC_CTL_EI_MODE(x)		((x) << 27)
 #define   GEN6_RC_CTL_HW_ENABLE			(1 << 31)
-#define GEN6_RP_DOWN_TIMEOUT			_MMIO(0xA010)
-#define GEN6_RP_INTERRUPT_LIMITS		_MMIO(0xA014)
-#define GEN6_RPSTAT1				_MMIO(0xA01C)
+#define GEN6_RP_DOWN_TIMEOUT			_MMIO(0xa010)
+#define GEN6_RP_INTERRUPT_LIMITS		_MMIO(0xa014)
+#define GEN6_RPSTAT1				_MMIO(0xa01c)
 #define   GEN6_CAGF_SHIFT			8
 #define   HSW_CAGF_SHIFT			7
 #define   GEN9_CAGF_SHIFT			23
 #define   GEN6_CAGF_MASK			(0x7f << GEN6_CAGF_SHIFT)
 #define   HSW_CAGF_MASK				(0x7f << HSW_CAGF_SHIFT)
 #define   GEN9_CAGF_MASK			(0x1ff << GEN9_CAGF_SHIFT)
-#define GEN6_RP_CONTROL				_MMIO(0xA024)
+#define GEN6_RP_CONTROL				_MMIO(0xa024)
 #define   GEN6_RP_MEDIA_TURBO			(1 << 11)
 #define   GEN6_RP_MEDIA_MODE_MASK		(3 << 9)
 #define   GEN6_RP_MEDIA_HW_TURBO_MODE		(3 << 9)
@@ -1265,193 +765,295 @@ enum {
 #define   GEN6_RPSWCTL_SHIFT			9
 #define   GEN9_RPSWCTL_ENABLE			(0x2 << GEN6_RPSWCTL_SHIFT)
 #define   GEN9_RPSWCTL_DISABLE			(0x0 << GEN6_RPSWCTL_SHIFT)
-#define GEN6_RP_UP_THRESHOLD			_MMIO(0xA02C)
-#define GEN6_RP_DOWN_THRESHOLD			_MMIO(0xA030)
-#define GEN6_RP_CUR_UP_EI			_MMIO(0xA050)
+#define GEN6_RP_UP_THRESHOLD			_MMIO(0xa02c)
+#define GEN6_RP_DOWN_THRESHOLD			_MMIO(0xa030)
+#define GEN6_RP_CUR_UP_EI			_MMIO(0xa050)
 #define   GEN6_RP_EI_MASK			0xffffff
 #define   GEN6_CURICONT_MASK			GEN6_RP_EI_MASK
-#define GEN6_RP_CUR_UP				_MMIO(0xA054)
+#define GEN6_RP_CUR_UP				_MMIO(0xa054)
 #define   GEN6_CURBSYTAVG_MASK			GEN6_RP_EI_MASK
-#define GEN6_RP_PREV_UP				_MMIO(0xA058)
-#define GEN6_RP_CUR_DOWN_EI			_MMIO(0xA05C)
+#define GEN6_RP_PREV_UP				_MMIO(0xa058)
+#define GEN6_RP_CUR_DOWN_EI			_MMIO(0xa05c)
 #define   GEN6_CURIAVG_MASK			GEN6_RP_EI_MASK
-#define GEN6_RP_CUR_DOWN			_MMIO(0xA060)
-#define GEN6_RP_PREV_DOWN			_MMIO(0xA064)
-#define GEN6_RP_UP_EI				_MMIO(0xA068)
-#define GEN6_RP_DOWN_EI				_MMIO(0xA06C)
-#define GEN6_RP_IDLE_HYSTERSIS			_MMIO(0xA070)
-#define GEN6_RPDEUHWTC				_MMIO(0xA080)
-#define GEN6_RPDEUC				_MMIO(0xA084)
-#define GEN6_RPDEUCSW				_MMIO(0xA088)
-#define GEN6_RC_STATE				_MMIO(0xA094)
+#define GEN6_RP_CUR_DOWN			_MMIO(0xa060)
+#define GEN6_RP_PREV_DOWN			_MMIO(0xa064)
+#define GEN6_RP_UP_EI				_MMIO(0xa068)
+#define GEN6_RP_DOWN_EI				_MMIO(0xa06c)
+#define GEN6_RP_IDLE_HYSTERSIS			_MMIO(0xa070)
+#define GEN6_RPDEUHWTC				_MMIO(0xa080)
+#define GEN6_RPDEUC				_MMIO(0xa084)
+#define GEN6_RPDEUCSW				_MMIO(0xa088)
+#define GEN6_RC_CONTROL				_MMIO(0xa090)
+#define GEN6_RC_STATE				_MMIO(0xa094)
 #define   RC_SW_TARGET_STATE_SHIFT		16
 #define   RC_SW_TARGET_STATE_MASK		(7 << RC_SW_TARGET_STATE_SHIFT)
-#define GEN6_RC1_WAKE_RATE_LIMIT		_MMIO(0xA098)
-#define GEN6_RC6_WAKE_RATE_LIMIT		_MMIO(0xA09C)
-#define GEN6_RC6pp_WAKE_RATE_LIMIT		_MMIO(0xA0A0)
-#define GEN10_MEDIA_WAKE_RATE_LIMIT		_MMIO(0xA0A0)
-#define GEN6_RC_EVALUATION_INTERVAL		_MMIO(0xA0A8)
-#define GEN6_RC_IDLE_HYSTERSIS			_MMIO(0xA0AC)
-#define GEN6_RC_SLEEP				_MMIO(0xA0B0)
-#define GEN6_RCUBMABDTMR			_MMIO(0xA0B0)
-#define GEN6_RC1e_THRESHOLD			_MMIO(0xA0B4)
-#define GEN6_RC6_THRESHOLD			_MMIO(0xA0B8)
-#define GEN6_RC6p_THRESHOLD			_MMIO(0xA0BC)
-#define VLV_RCEDATA				_MMIO(0xA0BC)
-#define GEN6_RC6pp_THRESHOLD			_MMIO(0xA0C0)
-#define GEN6_PMINTRMSK				_MMIO(0xA168)
+#define GEN6_RC1_WAKE_RATE_LIMIT		_MMIO(0xa098)
+#define GEN6_RC6_WAKE_RATE_LIMIT		_MMIO(0xa09c)
+#define GEN6_RC6pp_WAKE_RATE_LIMIT		_MMIO(0xa0a0)
+#define GEN10_MEDIA_WAKE_RATE_LIMIT		_MMIO(0xa0a0)
+#define GEN6_RC_EVALUATION_INTERVAL		_MMIO(0xa0a8)
+#define GEN6_RC_IDLE_HYSTERSIS			_MMIO(0xa0ac)
+#define GEN6_RC_SLEEP				_MMIO(0xa0b0)
+#define GEN6_RCUBMABDTMR			_MMIO(0xa0b0)
+#define GEN6_RC1e_THRESHOLD			_MMIO(0xa0b4)
+#define GEN6_RC6_THRESHOLD			_MMIO(0xa0b8)
+#define GEN6_RC6p_THRESHOLD			_MMIO(0xa0bc)
+#define VLV_RCEDATA				_MMIO(0xa0bc)
+#define GEN6_RC6pp_THRESHOLD			_MMIO(0xa0c0)
+#define GEN9_MEDIA_PG_IDLE_HYSTERESIS		_MMIO(0xa0c4)
+#define GEN9_RENDER_PG_IDLE_HYSTERESIS		_MMIO(0xa0c8)
+
+#define GEN6_PMINTRMSK				_MMIO(0xa168)
 #define   GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC	(1 << 31)
 #define   ARAT_EXPIRED_INTRMSK			(1 << 9)
-#define GEN8_MISC_CTRL0				_MMIO(0xA180)
-#define VLV_PWRDWNUPCTL				_MMIO(0xA294)
-#define GEN9_MEDIA_PG_IDLE_HYSTERESIS		_MMIO(0xA0C4)
-#define GEN9_RENDER_PG_IDLE_HYSTERESIS		_MMIO(0xA0C8)
-#define GEN9_PG_ENABLE				_MMIO(0xA210)
+
+#define GEN8_MISC_CTRL0				_MMIO(0xa180)
+
+#define ECOBUS					_MMIO(0xa180)
+#define    FORCEWAKE_MT_ENABLE			(1 << 5)
+
+#define FORCEWAKE_MT				_MMIO(0xa188) /* multi-threaded */
+#define FORCEWAKE_GT_GEN9			_MMIO(0xa188)
+#define FORCEWAKE				_MMIO(0xa18c)
+
+#define VLV_SPAREG2H				_MMIO(0xa194)
+
+#define GEN9_PG_ENABLE				_MMIO(0xa210)
 #define   GEN9_RENDER_PG_ENABLE			REG_BIT(0)
 #define   GEN9_MEDIA_PG_ENABLE			REG_BIT(1)
 #define   GEN11_MEDIA_SAMPLER_PG_ENABLE		REG_BIT(2)
 #define   VDN_HCP_POWERGATE_ENABLE(n)		REG_BIT(3 + 2 * (n))
 #define   VDN_MFX_POWERGATE_ENABLE(n)		REG_BIT(4 + 2 * (n))
-#define GEN8_PUSHBUS_CONTROL			_MMIO(0xA248)
-#define GEN8_PUSHBUS_ENABLE			_MMIO(0xA250)
-#define GEN8_PUSHBUS_SHIFT			_MMIO(0xA25C)
 
-#define GEN6_PMISR				_MMIO(0x44020)
-#define GEN6_PMIMR				_MMIO(0x44024) /* rps_lock */
-#define GEN6_PMIIR				_MMIO(0x44028)
-#define GEN6_PMIER				_MMIO(0x4402C)
-#define  GEN6_PM_MBOX_EVENT			(1 << 25)
-#define  GEN6_PM_THERMAL_EVENT			(1 << 24)
+#define GEN8_PUSHBUS_CONTROL			_MMIO(0xa248)
+#define GEN8_PUSHBUS_ENABLE			_MMIO(0xa250)
+#define GEN8_PUSHBUS_SHIFT			_MMIO(0xa25c)
 
-/*
- * For Gen11 these are in the upper word of the GPM_WGBOXPERF
- * registers. Shifting is handled on accessing the imr and ier.
- */
-#define  GEN6_PM_RP_DOWN_TIMEOUT		(1 << 6)
-#define  GEN6_PM_RP_UP_THRESHOLD		(1 << 5)
-#define  GEN6_PM_RP_DOWN_THRESHOLD		(1 << 4)
-#define  GEN6_PM_RP_UP_EI_EXPIRED		(1 << 2)
-#define  GEN6_PM_RP_DOWN_EI_EXPIRED		(1 << 1)
-#define  GEN6_PM_RPS_EVENTS			(GEN6_PM_RP_UP_EI_EXPIRED   | \
-						 GEN6_PM_RP_UP_THRESHOLD    | \
-						 GEN6_PM_RP_DOWN_EI_EXPIRED | \
-						 GEN6_PM_RP_DOWN_THRESHOLD  | \
-						 GEN6_PM_RP_DOWN_TIMEOUT)
+/* GPM unit config (Gen9+) */
+#define CTC_MODE				_MMIO(0xa26c)
+#define   CTC_SOURCE_PARAMETER_MASK		1
+#define   CTC_SOURCE_CRYSTAL_CLOCK		0
+#define   CTC_SOURCE_DIVIDE_LOGIC		1
+#define   CTC_SHIFT_PARAMETER_SHIFT		1
+#define   CTC_SHIFT_PARAMETER_MASK		(0x3 << CTC_SHIFT_PARAMETER_SHIFT)
 
-#define GEN7_GT_SCRATCH(i)			_MMIO(0x4F100 + (i) * 4)
-#define GEN7_GT_SCRATCH_REG_NUM			8
+#define FORCEWAKE_MEDIA_GEN9			_MMIO(0xa270)
+#define FORCEWAKE_RENDER_GEN9			_MMIO(0xa278)
 
-#define VLV_GTLC_SURVIVABILITY_REG              _MMIO(0x130098)
-#define VLV_GFX_CLK_STATUS_BIT			(1 << 3)
-#define VLV_GFX_CLK_FORCE_ON_BIT		(1 << 2)
+#define VLV_PWRDWNUPCTL				_MMIO(0xa294)
 
-#define GEN6_GT_GFX_RC6_LOCKED			_MMIO(0x138104)
-#define VLV_COUNTER_CONTROL			_MMIO(0x138104)
-#define   VLV_COUNT_RANGE_HIGH			(1 << 15)
-#define   VLV_MEDIA_RC0_COUNT_EN		(1 << 5)
-#define   VLV_RENDER_RC0_COUNT_EN		(1 << 4)
-#define   VLV_MEDIA_RC6_COUNT_EN		(1 << 1)
-#define   VLV_RENDER_RC6_COUNT_EN		(1 << 0)
-#define GEN6_GT_GFX_RC6				_MMIO(0x138108)
-#define VLV_GT_RENDER_RC6			_MMIO(0x138108)
-#define VLV_GT_MEDIA_RC6			_MMIO(0x13810C)
+#define GEN9_PWRGT_DOMAIN_STATUS		_MMIO(0xa2a0)
+#define   GEN9_PWRGT_MEDIA_STATUS_MASK		(1 << 0)
+#define   GEN9_PWRGT_RENDER_STATUS_MASK		(1 << 1)
 
-#define GEN6_GT_GFX_RC6p			_MMIO(0x13810C)
-#define GEN6_GT_GFX_RC6pp			_MMIO(0x138110)
-#define VLV_RENDER_C0_COUNT			_MMIO(0x138118)
-#define VLV_MEDIA_C0_COUNT			_MMIO(0x13811C)
-
-#define GEN6_GT_CORE_STATUS		_MMIO(0x138060)
-#define   GEN6_CORE_CPD_STATE_MASK	(7 << 4)
-#define   GEN6_RCn_MASK			7
-#define   GEN6_RC0			0
-#define   GEN6_RC3			2
-#define   GEN6_RC6			3
-#define   GEN6_RC7			4
-
-#define GEN8_GT_SLICE_INFO		_MMIO(0x138064)
-#define   GEN8_LSLICESTAT_MASK		0x7
-
-#define CHV_POWER_SS0_SIG1		_MMIO(0xa720)
-#define CHV_POWER_SS1_SIG1		_MMIO(0xa728)
-#define   CHV_SS_PG_ENABLE		(1 << 1)
-#define   CHV_EU08_PG_ENABLE		(1 << 9)
-#define   CHV_EU19_PG_ENABLE		(1 << 17)
-#define   CHV_EU210_PG_ENABLE		(1 << 25)
-
-#define CHV_POWER_SS0_SIG2		_MMIO(0xa724)
-#define CHV_POWER_SS1_SIG2		_MMIO(0xa72c)
-#define   CHV_EU311_PG_ENABLE		(1 << 1)
-
-#define GEN9_SLICE_PGCTL_ACK(slice)	_MMIO(0x804c + (slice) * 0x4)
-#define GEN10_SLICE_PGCTL_ACK(slice)	_MMIO(0x804c + ((slice) / 3) * 0x34 + \
-					      ((slice) % 3) * 0x4)
-#define   GEN9_PGCTL_SLICE_ACK		(1 << 0)
-#define   GEN9_PGCTL_SS_ACK(subslice)	(1 << (2 + (subslice) * 2))
-#define   GEN10_PGCTL_VALID_SS_MASK(slice) ((slice) == 0 ? 0x7F : 0x1F)
-
-#define GEN9_SS01_EU_PGCTL_ACK(slice)	_MMIO(0x805c + (slice) * 0x8)
-#define GEN10_SS01_EU_PGCTL_ACK(slice)	_MMIO(0x805c + ((slice) / 3) * 0x30 + \
-					      ((slice) % 3) * 0x8)
-#define GEN9_SS23_EU_PGCTL_ACK(slice)	_MMIO(0x8060 + (slice) * 0x8)
-#define GEN10_SS23_EU_PGCTL_ACK(slice)	_MMIO(0x8060 + ((slice) / 3) * 0x30 + \
-					      ((slice) % 3) * 0x8)
-#define   GEN9_PGCTL_SSA_EU08_ACK	(1 << 0)
-#define   GEN9_PGCTL_SSA_EU19_ACK	(1 << 2)
-#define   GEN9_PGCTL_SSA_EU210_ACK	(1 << 4)
-#define   GEN9_PGCTL_SSA_EU311_ACK	(1 << 6)
-#define   GEN9_PGCTL_SSB_EU08_ACK	(1 << 8)
-#define   GEN9_PGCTL_SSB_EU19_ACK	(1 << 10)
-#define   GEN9_PGCTL_SSB_EU210_ACK	(1 << 12)
-#define   GEN9_PGCTL_SSB_EU311_ACK	(1 << 14)
+#define MISC_STATUS0				_MMIO(0xa500)
+#define MISC_STATUS1				_MMIO(0xa504)
 
-#define GEN7_MISCCPCTL				_MMIO(0x9424)
-#define   GEN7_DOP_CLOCK_GATE_ENABLE		(1 << 0)
-#define   GEN8_DOP_CLOCK_GATE_CFCLK_ENABLE	(1 << 2)
-#define   GEN8_DOP_CLOCK_GATE_GUC_ENABLE	(1 << 4)
-#define   GEN8_DOP_CLOCK_GATE_MEDIA_ENABLE     (1 << 6)
+#define FORCEWAKE_MEDIA_VDBOX_GEN11(n)		_MMIO(0xa540 + (n) * 4)
+#define FORCEWAKE_MEDIA_VEBOX_GEN11(n)		_MMIO(0xa560 + (n) * 4)
+
+#define CHV_POWER_SS0_SIG1			_MMIO(0xa720)
+#define CHV_POWER_SS0_SIG2			_MMIO(0xa724)
+#define CHV_POWER_SS1_SIG1			_MMIO(0xa728)
+#define   CHV_SS_PG_ENABLE			(1 << 1)
+#define   CHV_EU08_PG_ENABLE			(1 << 9)
+#define   CHV_EU19_PG_ENABLE			(1 << 17)
+#define   CHV_EU210_PG_ENABLE			(1 << 25)
+#define CHV_POWER_SS1_SIG2			_MMIO(0xa72c)
+#define   CHV_EU311_PG_ENABLE			(1 << 1)
 
-#define GEN8_GARBCNTL				_MMIO(0xB004)
+#define GEN7_SARCHKMD				_MMIO(0xb000)
+#define   GEN7_DISABLE_DEMAND_PREFETCH		(1 << 31)
+#define   GEN7_DISABLE_SAMPLER_PREFETCH		(1 << 30)
+
+#define GEN8_GARBCNTL				_MMIO(0xb004)
 #define   GEN9_GAPS_TSV_CREDIT_DISABLE		(1 << 7)
 #define   GEN11_ARBITRATION_PRIO_ORDER_MASK	(0x3f << 22)
 #define   GEN11_HASH_CTRL_EXCL_MASK		(0x7f << 0)
 #define   GEN11_HASH_CTRL_EXCL_BIT0		(1 << 0)
 
-#define GEN11_GLBLINVL				_MMIO(0xB404)
-#define   GEN11_BANK_HASH_ADDR_EXCL_MASK	(0x7f << 5)
-#define   GEN11_BANK_HASH_ADDR_EXCL_BIT0	(1 << 5)
+#define GEN9_SCRATCH_LNCF1			_MMIO(0xb008)
+#define   GEN9_LNCF_NONIA_COHERENT_ATOMICS_ENABLE	REG_BIT(0)
 
-#define GEN10_DFR_RATIO_EN_AND_CHICKEN	_MMIO(0x9550)
-#define   DFR_DISABLE			(1 << 9)
+#define GEN7_L3SQCREG1				_MMIO(0xb010)
+#define   VLV_B0_WA_L3SQCREG1_VALUE		0x00D30000
 
-#define GEN11_GACB_PERF_CTRL			_MMIO(0x4B80)
-#define   GEN11_HASH_CTRL_MASK			(0x3 << 12 | 0xf << 0)
-#define   GEN11_HASH_CTRL_BIT0			(1 << 0)
-#define   GEN11_HASH_CTRL_BIT4			(1 << 12)
+#define GEN7_L3CNTLREG1				_MMIO(0xb01c)
+#define   GEN7_WA_FOR_GEN7_L3_CONTROL		0x3C47FF8C
+#define   GEN7_L3AGDIS				(1 << 19)
+#define GEN7_L3CNTLREG2				_MMIO(0xb020)
+
+/* MOCS (Memory Object Control State) registers */
+#define GEN9_LNCFCMOCS(i)			_MMIO(0xb020 + (i) * 4)	/* L3 Cache Control */
+#define GEN9_LNCFCMOCS_REG_COUNT		32
+
+#define GEN7_L3CNTLREG3				_MMIO(0xb024)
+
+#define GEN7_L3_CHICKEN_MODE_REGISTER		_MMIO(0xb030)
+#define   GEN7_WA_L3_CHICKEN_MODE		0x20000000
+
+#define GEN7_L3SQCREG4				_MMIO(0xb034)
+#define   L3SQ_URB_READ_CAM_MATCH_DISABLE	(1 << 27)
+
+#define HSW_SCRATCH1				_MMIO(0xb038)
+#define   HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE	(1 << 27)
+
+#define GEN7_L3LOG(slice, i)			_MMIO(0xb070 + (slice) * 0x200 + (i) * 4)
+#define   GEN7_L3LOG_SIZE			0x80
 
-#define GEN11_LSN_UNSLCVC				_MMIO(0xB43C)
+#define GEN10_SCRATCH_LNCF2			_MMIO(0xb0a0)
+#define   PMFLUSHDONE_LNICRSDROP		(1 << 20)
+#define   PMFLUSH_GAPL3UNBLOCK			(1 << 21)
+#define   PMFLUSHDONE_LNEBLK			(1 << 22)
+
+#define XEHP_L3NODEARBCFG			_MMIO(0xb0b4)
+#define   XEHP_LNESPARE				REG_BIT(19)
+
+#define GEN8_L3SQCREG1				_MMIO(0xb100)
+/*
+ * Note that on CHV the following has an off-by-one error wrt. to BSpec.
+ * Using the formula in BSpec leads to a hang, while the formula here works
+ * fine and matches the formulas for all other platforms. A BSpec change
+ * request has been filed to clarify this.
+ */
+#define   L3_GENERAL_PRIO_CREDITS(x)		(((x) >> 1) << 19)
+#define   L3_HIGH_PRIO_CREDITS(x)		(((x) >> 1) << 14)
+#define   L3_PRIO_CREDITS_MASK			((0x1f << 19) | (0x1f << 14))
+
+#define GEN10_L3_CHICKEN_MODE_REGISTER		_MMIO(0xb114)
+#define   GEN11_I2M_WRITE_DISABLE		(1 << 28)
+
+#define GEN8_L3SQCREG4				_MMIO(0xb118)
+#define   GEN11_LQSC_CLEAN_EVICT_DISABLE	(1 << 6)
+#define   GEN8_LQSC_RO_PERF_DIS			(1 << 27)
+#define   GEN8_LQSC_FLUSH_COHERENT_LINES	(1 << 21)
+#define   GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE	REG_BIT(22)
+
+#define GEN9_SCRATCH1				_MMIO(0xb11c)
+#define   EVICTION_PERF_FIX_ENABLE		REG_BIT(8)
+
+#define BDW_SCRATCH1				_MMIO(0xb11c)
+#define   GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE	(1 << 2)
+
+#define GEN11_SCRATCH2				_MMIO(0xb140)
+#define   GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE	(1 << 19)
+
+#define GEN11_L3SQCREG5				_MMIO(0xb158)
+#define   L3_PWM_TIMER_INIT_VAL_MASK		REG_GENMASK(9, 0)
+
+#define MLTICTXCTL				_MMIO(0xb170)
+#define   TDONRENDER				REG_BIT(2)
+
+#define XEHP_L3SCQREG7				_MMIO(0xb188)
+#define   BLEND_FILL_CACHING_OPT_DIS		REG_BIT(3)
+
+#define L3SQCREG1_CCS0				_MMIO(0xb200)
+#define   FLUSHALLNONCOH			REG_BIT(5)
+
+#define GEN11_GLBLINVL				_MMIO(0xb404)
+#define   GEN11_BANK_HASH_ADDR_EXCL_MASK	(0x7f << 5)
+#define   GEN11_BANK_HASH_ADDR_EXCL_BIT0	(1 << 5)
+
+#define GEN11_LSN_UNSLCVC			_MMIO(0xb43c)
 #define   GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC	(1 << 9)
 #define   GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC	(1 << 7)
 
-#define GEN10_SAMPLER_MODE		_MMIO(0xE18C)
-#define   ENABLE_SMALLPL			REG_BIT(15)
-#define   GEN11_SAMPLER_ENABLE_HEADLESS_MSG	REG_BIT(5)
+#define __GEN9_RCS0_MOCS0			0xc800
+#define GEN9_GFX_MOCS(i)			_MMIO(__GEN9_RCS0_MOCS0 + (i) * 4)
+#define __GEN9_VCS0_MOCS0			0xc900
+#define GEN9_MFX0_MOCS(i)			_MMIO(__GEN9_VCS0_MOCS0 + (i) * 4)
+#define __GEN9_VCS1_MOCS0			0xca00
+#define GEN9_MFX1_MOCS(i)			_MMIO(__GEN9_VCS1_MOCS0 + (i) * 4)
+#define __GEN9_VECS0_MOCS0			0xcb00
+#define GEN9_VEBOX_MOCS(i)			_MMIO(__GEN9_VECS0_MOCS0 + (i) * 4)
+#define __GEN9_BCS0_MOCS0			0xcc00
+#define GEN9_BLT_MOCS(i)			_MMIO(__GEN9_BCS0_MOCS0 + (i) * 4)
+
+#define GEN12_FAULT_TLB_DATA0			_MMIO(0xceb8)
+#define GEN12_FAULT_TLB_DATA1			_MMIO(0xcebc)
+#define   FAULT_VA_HIGH_BITS			(0xf << 0)
+#define   FAULT_GTT_SEL				(1 << 4)
+
+#define GEN12_RING_FAULT_REG			_MMIO(0xcec4)
+#define   GEN8_RING_FAULT_ENGINE_ID(x)		(((x) >> 12) & 0x7)
+#define   RING_FAULT_GTTSEL_MASK		(1 << 11)
+#define   RING_FAULT_SRCID(x)			(((x) >> 3) & 0xff)
+#define   RING_FAULT_FAULT_TYPE(x)		(((x) >> 1) & 0x3)
+#define   RING_FAULT_VALID			(1 << 0)
+
+#define GEN12_GFX_TLB_INV_CR			_MMIO(0xced8)
+#define GEN12_VD_TLB_INV_CR			_MMIO(0xcedc)
+#define GEN12_VE_TLB_INV_CR			_MMIO(0xcee0)
+#define GEN12_BLT_TLB_INV_CR			_MMIO(0xcee4)
+
+#define GEN12_MERT_MOD_CTRL			_MMIO(0xcf28)
+#define RENDER_MOD_CTRL				_MMIO(0xcf2c)
+#define COMP_MOD_CTRL				_MMIO(0xcf30)
+#define VDBX_MOD_CTRL				_MMIO(0xcf34)
+#define VEBX_MOD_CTRL				_MMIO(0xcf38)
+#define   FORCE_MISS_FTLB			REG_BIT(3)
+
+#define GEN12_GAMSTLB_CTRL			_MMIO(0xcf4c)
+#define   CONTROL_BLOCK_CLKGATE_DIS		REG_BIT(12)
+#define   EGRESS_BLOCK_CLKGATE_DIS		REG_BIT(11)
+#define   TAG_BLOCK_CLKGATE_DIS			REG_BIT(7)
+
+#define GEN12_GAMCNTRL_CTRL			_MMIO(0xcf54)
+#define   INVALIDATION_BROADCAST_MODE_DIS	REG_BIT(12)
+#define   GLOBAL_INVALIDATION_MODE		REG_BIT(2)
 
-#define GEN7_L3LOG(slice, i)		_MMIO(0xB070 + (slice) * 0x200 + (i) * 4)
-#define GEN7_L3LOG_SIZE			0x80
+#define GEN12_GAM_DONE				_MMIO(0xcf68)
 
-#define GEN7_HALF_SLICE_CHICKEN1	_MMIO(0xe100) /* IVB GT1 + VLV */
-#define GEN7_HALF_SLICE_CHICKEN1_GT2	_MMIO(0xf100)
+#define GEN7_HALF_SLICE_CHICKEN1		_MMIO(0xe100) /* IVB GT1 + VLV */
 #define   GEN7_MAX_PS_THREAD_DEP		(8 << 12)
 #define   GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE	(1 << 10)
 #define   GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE	(1 << 4)
 #define   GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE	(1 << 3)
 
-#define GEN9_HALF_SLICE_CHICKEN5	_MMIO(0xe188)
-#define   GEN9_DG_MIRROR_FIX_ENABLE	(1 << 5)
-#define   GEN9_CCS_TLB_PREFETCH_ENABLE	(1 << 3)
+#define GEN7_SAMPLER_INSTDONE			_MMIO(0xe160)
+#define GEN7_ROW_INSTDONE			_MMIO(0xe164)
+
+#define HALF_SLICE_CHICKEN2			_MMIO(0xe180)
+#define   GEN8_ST_PO_DISABLE			(1 << 13)
+
+#define HALF_SLICE_CHICKEN3			_MMIO(0xe184)
+#define   HSW_SAMPLE_C_PERFORMANCE		(1 << 9)
+#define   GEN8_CENTROID_PIXEL_OPT_DIS		(1 << 8)
+#define   GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC	(1 << 5)
+#define   GEN8_SAMPLER_POWER_BYPASS_DIS		(1 << 1)
+
+#define GEN9_HALF_SLICE_CHICKEN5		_MMIO(0xe188)
+#define   GEN9_DG_MIRROR_FIX_ENABLE		(1 << 5)
+#define   GEN9_CCS_TLB_PREFETCH_ENABLE		(1 << 3)
+
+#define GEN10_SAMPLER_MODE			_MMIO(0xe18c)
+#define   ENABLE_SMALLPL			REG_BIT(15)
+#define   GEN11_SAMPLER_ENABLE_HEADLESS_MSG	REG_BIT(5)
+
+#define GEN9_HALF_SLICE_CHICKEN7		_MMIO(0xe194)
+#define   DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA	REG_BIT(15)
+#define   GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR	REG_BIT(8)
+#define   GEN9_ENABLE_YV12_BUGFIX		REG_BIT(4)
+#define   GEN9_ENABLE_GPGPU_PREEMPTION		REG_BIT(2)
+
+#define GEN10_CACHE_MODE_SS			_MMIO(0xe420)
+#define   ENABLE_PREFETCH_INTO_IC		REG_BIT(3)
+#define   FLOAT_BLEND_OPTIMIZATION_ENABLE	REG_BIT(4)
+
+#define EU_PERF_CNTL0				_MMIO(0xe458)
+#define EU_PERF_CNTL4				_MMIO(0xe45c)
+
+#define GEN9_ROW_CHICKEN4			_MMIO(0xe48c)
+#define   GEN12_DISABLE_GRF_CLEAR		REG_BIT(13)
+#define   XEHP_DIS_BBL_SYSPIPE			REG_BIT(11)
+#define   GEN12_DISABLE_TDL_PUSH		REG_BIT(9)
+#define   GEN11_DIS_PICK_2ND_EU			REG_BIT(7)
+#define   GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX	REG_BIT(4)
+
+#define HSW_ROW_CHICKEN3			_MMIO(0xe49c)
+#define   HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE	(1 << 6)
 
-#define GEN8_ROW_CHICKEN		_MMIO(0xe4f0)
+#define GEN8_ROW_CHICKEN			_MMIO(0xe4f0)
 #define   FLOW_CONTROL_ENABLE			REG_BIT(15)
 #define   UGM_BACKUP_MODE			REG_BIT(13)
 #define   MDQ_ARBITRATION_MODE			REG_BIT(12)
@@ -1466,6 +1068,20 @@ enum {
 #define   GEN12_ENABLE_LARGE_GRF_MODE		REG_BIT(12)
 #define   GEN12_PUSH_CONST_DEREF_HOLD_DIS	REG_BIT(8)
 
+#define RT_CTRL					_MMIO(0xe530)
+#define   DIS_NULL_QUERY			REG_BIT(10)
+
+#define EU_PERF_CNTL1				_MMIO(0xe558)
+#define EU_PERF_CNTL5				_MMIO(0xe55c)
+
+#define GEN12_HDC_CHICKEN0			_MMIO(0xe5f0)
+#define   LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK	REG_GENMASK(13, 11)
+#define ICL_HDC_MODE				_MMIO(0xe5f4)
+
+#define EU_PERF_CNTL2				_MMIO(0xe658)
+#define EU_PERF_CNTL6				_MMIO(0xe65c)
+#define EU_PERF_CNTL3				_MMIO(0xe758)
+
 #define LSC_CHICKEN_BIT_0			_MMIO(0xe7c8)
 #define   FORCE_1_SUB_MESSAGE_PER_FRAGMENT	REG_BIT(15)
 #define LSC_CHICKEN_BIT_0_UDW			_MMIO(0xe7c8 + 4)
@@ -1475,80 +1091,436 @@ enum {
 #define   MAXREQS_PER_BANK			REG_GENMASK(39 - 32, 37 - 32)
 #define   DISABLE_128B_EVICTION_COMMAND_UDW	REG_BIT(36 - 32)
 
-#define GEN7_ROW_CHICKEN2_GT2		_MMIO(0xf4f4)
-#define   DOP_CLOCK_GATING_DISABLE	(1 << 0)
-#define   PUSH_CONSTANT_DEREF_DISABLE	(1 << 8)
-#define   GEN11_TDL_CLOCK_GATING_FIX_DISABLE	(1 << 1)
+#define SARB_CHICKEN1				_MMIO(0xe90c)
+#define   COMP_CKN_IN				REG_GENMASK(30, 29)
 
-#define GEN9_ROW_CHICKEN4				_MMIO(0xe48c)
-#define   GEN12_DISABLE_GRF_CLEAR			REG_BIT(13)
-#define   GEN12_DISABLE_TDL_PUSH			REG_BIT(9)
-#define   GEN11_DIS_PICK_2ND_EU				REG_BIT(7)
-#define   GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX	REG_BIT(4)
+#define GEN7_HALF_SLICE_CHICKEN1_GT2		_MMIO(0xf100)
 
-#define HSW_ROW_CHICKEN3		_MMIO(0xe49c)
-#define  HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE    (1 << 6)
+#define GEN7_ROW_CHICKEN2_GT2			_MMIO(0xf4f4)
+#define   DOP_CLOCK_GATING_DISABLE		(1 << 0)
+#define   PUSH_CONSTANT_DEREF_DISABLE		(1 << 8)
+#define   GEN11_TDL_CLOCK_GATING_FIX_DISABLE	(1 << 1)
 
-#define HALF_SLICE_CHICKEN2		_MMIO(0xe180)
-#define   GEN8_ST_PO_DISABLE		(1 << 13)
+#define __GEN11_VCS2_MOCS0			0x10000
+#define GEN11_MFX2_MOCS(i)			_MMIO(__GEN11_VCS2_MOCS0 + (i) * 4)
+
+#define CRSTANDVID				_MMIO(0x11100)
+#define PXVFREQ(fstart)				_MMIO(0x11110 + (fstart) * 4)  /* P[0-15]VIDFREQ (0x1114c) (Ironlake) */
+#define   PXVFREQ_PX_MASK			0x7f000000
+#define   PXVFREQ_PX_SHIFT			24
+#define VIDFREQ_BASE				_MMIO(0x11110)
+#define VIDFREQ1				_MMIO(0x11110) /* VIDFREQ1-4 (0x1111c) (Cantiga) */
+#define VIDFREQ2				_MMIO(0x11114)
+#define VIDFREQ3				_MMIO(0x11118)
+#define VIDFREQ4				_MMIO(0x1111c)
+#define   VIDFREQ_P0_MASK			0x1f000000
+#define   VIDFREQ_P0_SHIFT			24
+#define   VIDFREQ_P0_CSCLK_MASK			0x00f00000
+#define   VIDFREQ_P0_CSCLK_SHIFT		20
+#define   VIDFREQ_P0_CRCLK_MASK			0x000f0000
+#define   VIDFREQ_P0_CRCLK_SHIFT		16
+#define   VIDFREQ_P1_MASK			0x00001f00
+#define   VIDFREQ_P1_SHIFT			8
+#define   VIDFREQ_P1_CSCLK_MASK			0x000000f0
+#define   VIDFREQ_P1_CSCLK_SHIFT		4
+#define   VIDFREQ_P1_CRCLK_MASK			0x0000000f
+#define INTTOEXT_BASE				_MMIO(0x11120) /* INTTOEXT1-8 (0x1113c) */
+#define   INTTOEXT_MAP3_SHIFT			24
+#define   INTTOEXT_MAP3_MASK			(0x1f << INTTOEXT_MAP3_SHIFT)
+#define   INTTOEXT_MAP2_SHIFT			16
+#define   INTTOEXT_MAP2_MASK			(0x1f << INTTOEXT_MAP2_SHIFT)
+#define   INTTOEXT_MAP1_SHIFT			8
+#define   INTTOEXT_MAP1_MASK			(0x1f << INTTOEXT_MAP1_SHIFT)
+#define   INTTOEXT_MAP0_SHIFT			0
+#define   INTTOEXT_MAP0_MASK			(0x1f << INTTOEXT_MAP0_SHIFT)
+#define MEMSWCTL				_MMIO(0x11170) /* Ironlake only */
+#define   MEMCTL_CMD_MASK			0xe000
+#define   MEMCTL_CMD_SHIFT			13
+#define   MEMCTL_CMD_RCLK_OFF			0
+#define   MEMCTL_CMD_RCLK_ON			1
+#define   MEMCTL_CMD_CHFREQ			2
+#define   MEMCTL_CMD_CHVID			3
+#define   MEMCTL_CMD_VMMOFF			4
+#define   MEMCTL_CMD_VMMON			5
+#define   MEMCTL_CMD_STS			(1 << 12) /* write 1 triggers command, clears
+							     when command complete */
+#define   MEMCTL_FREQ_MASK			0x0f00 /* jitter, from 0-15 */
+#define   MEMCTL_FREQ_SHIFT			8
+#define   MEMCTL_SFCAVM				(1 << 7)
+#define   MEMCTL_TGT_VID_MASK			0x007f
+#define MEMIHYST				_MMIO(0x1117c)
+#define MEMINTREN				_MMIO(0x11180) /* 16 bits */
+#define   MEMINT_RSEXIT_EN			(1 << 8)
+#define   MEMINT_CX_SUPR_EN			(1 << 7)
+#define   MEMINT_CONT_BUSY_EN			(1 << 6)
+#define   MEMINT_AVG_BUSY_EN			(1 << 5)
+#define   MEMINT_EVAL_CHG_EN			(1 << 4)
+#define   MEMINT_MON_IDLE_EN			(1 << 3)
+#define   MEMINT_UP_EVAL_EN			(1 << 2)
+#define   MEMINT_DOWN_EVAL_EN			(1 << 1)
+#define   MEMINT_SW_CMD_EN			(1 << 0)
+#define MEMINTRSTR				_MMIO(0x11182) /* 16 bits */
+#define   MEM_RSEXIT_MASK			0xc000
+#define   MEM_RSEXIT_SHIFT			14
+#define   MEM_CONT_BUSY_MASK			0x3000
+#define   MEM_CONT_BUSY_SHIFT			12
+#define   MEM_AVG_BUSY_MASK			0x0c00
+#define   MEM_AVG_BUSY_SHIFT			10
+#define   MEM_EVAL_CHG_MASK			0x0300
+#define   MEM_EVAL_BUSY_SHIFT			8
+#define   MEM_MON_IDLE_MASK			0x00c0
+#define   MEM_MON_IDLE_SHIFT			6
+#define   MEM_UP_EVAL_MASK			0x0030
+#define   MEM_UP_EVAL_SHIFT			4
+#define   MEM_DOWN_EVAL_MASK			0x000c
+#define   MEM_DOWN_EVAL_SHIFT			2
+#define   MEM_SW_CMD_MASK			0x0003
+#define   MEM_INT_STEER_GFX			0
+#define   MEM_INT_STEER_CMR			1
+#define   MEM_INT_STEER_SMI			2
+#define   MEM_INT_STEER_SCI			3
+#define MEMINTRSTS				_MMIO(0x11184)
+#define   MEMINT_RSEXIT				(1 << 7)
+#define   MEMINT_CONT_BUSY			(1 << 6)
+#define   MEMINT_AVG_BUSY			(1 << 5)
+#define   MEMINT_EVAL_CHG			(1 << 4)
+#define   MEMINT_MON_IDLE			(1 << 3)
+#define   MEMINT_UP_EVAL			(1 << 2)
+#define   MEMINT_DOWN_EVAL			(1 << 1)
+#define   MEMINT_SW_CMD				(1 << 0)
+#define MEMMODECTL				_MMIO(0x11190)
+#define   MEMMODE_BOOST_EN			(1 << 31)
+#define   MEMMODE_BOOST_FREQ_MASK		0x0f000000 /* jitter for boost, 0-15 */
+#define   MEMMODE_BOOST_FREQ_SHIFT		24
+#define   MEMMODE_IDLE_MODE_MASK		0x00030000
+#define   MEMMODE_IDLE_MODE_SHIFT		16
+#define   MEMMODE_IDLE_MODE_EVAL		0
+#define   MEMMODE_IDLE_MODE_CONT		1
+#define   MEMMODE_HWIDLE_EN			(1 << 15)
+#define   MEMMODE_SWMODE_EN			(1 << 14)
+#define   MEMMODE_RCLK_GATE			(1 << 13)
+#define   MEMMODE_HW_UPDATE			(1 << 12)
+#define   MEMMODE_FSTART_MASK			0x00000f00 /* starting jitter, 0-15 */
+#define   MEMMODE_FSTART_SHIFT			8
+#define   MEMMODE_FMAX_MASK			0x000000f0 /* max jitter, 0-15 */
+#define   MEMMODE_FMAX_SHIFT			4
+#define   MEMMODE_FMIN_MASK			0x0000000f /* min jitter, 0-15 */
+#define RCBMAXAVG				_MMIO(0x1119c)
+#define MEMSWCTL2				_MMIO(0x1119e) /* Cantiga only */
+#define   SWMEMCMD_RENDER_OFF			(0 << 13)
+#define   SWMEMCMD_RENDER_ON			(1 << 13)
+#define   SWMEMCMD_SWFREQ			(2 << 13)
+#define   SWMEMCMD_TARVID			(3 << 13)
+#define   SWMEMCMD_VRM_OFF			(4 << 13)
+#define   SWMEMCMD_VRM_ON			(5 << 13)
+#define   CMDSTS				(1 << 12)
+#define   SFCAVM				(1 << 11)
+#define   SWFREQ_MASK				0x0380 /* P0-7 */
+#define   SWFREQ_SHIFT				7
+#define   TARVID_MASK				0x001f
+#define MEMSTAT_CTG				_MMIO(0x111a0)
+#define RCBMINAVG				_MMIO(0x111a0)
+#define RCUPEI					_MMIO(0x111b0)
+#define RCDNEI					_MMIO(0x111b4)
+#define RSTDBYCTL				_MMIO(0x111b8)
+#define   RS1EN					(1 << 31)
+#define   RS2EN					(1 << 30)
+#define   RS3EN					(1 << 29)
+#define   D3RS3EN				(1 << 28) /* Display D3 imlies RS3 */
+#define   SWPROMORSX				(1 << 27) /* RSx promotion timers ignored */
+#define   RCWAKERW				(1 << 26) /* Resetwarn from PCH causes wakeup */
+#define   DPRSLPVREN				(1 << 25) /* Fast voltage ramp enable */
+#define   GFXTGHYST				(1 << 24) /* Hysteresis to allow trunk gating */
+#define   RCX_SW_EXIT				(1 << 23) /* Leave RSx and prevent re-entry */
+#define   RSX_STATUS_MASK			(7 << 20)
+#define   RSX_STATUS_ON				(0 << 20)
+#define   RSX_STATUS_RC1			(1 << 20)
+#define   RSX_STATUS_RC1E			(2 << 20)
+#define   RSX_STATUS_RS1			(3 << 20)
+#define   RSX_STATUS_RS2			(4 << 20) /* aka rc6 */
+#define   RSX_STATUS_RSVD			(5 << 20) /* deep rc6 unsupported on ilk */
+#define   RSX_STATUS_RS3			(6 << 20) /* rs3 unsupported on ilk */
+#define   RSX_STATUS_RSVD2			(7 << 20)
+#define   UWRCRSXE				(1 << 19) /* wake counter limit prevents rsx */
+#define   RSCRP					(1 << 18) /* rs requests control on rs1/2 reqs */
+#define   JRSC					(1 << 17) /* rsx coupled to cpu c-state */
+#define   RS2INC0				(1 << 16) /* allow rs2 in cpu c0 */
+#define   RS1CONTSAV_MASK			(3 << 14)
+#define   RS1CONTSAV_NO_RS1			(0 << 14) /* rs1 doesn't save/restore context */
+#define   RS1CONTSAV_RSVD			(1 << 14)
+#define   RS1CONTSAV_SAVE_RS1			(2 << 14) /* rs1 saves context */
+#define   RS1CONTSAV_FULL_RS1			(3 << 14) /* rs1 saves and restores context */
+#define   NORMSLEXLAT_MASK			(3 << 12)
+#define   SLOW_RS123				(0 << 12)
+#define   SLOW_RS23				(1 << 12)
+#define   SLOW_RS3				(2 << 12)
+#define   NORMAL_RS123				(3 << 12)
+#define   RCMODE_TIMEOUT			(1 << 11) /* 0 is eval interval method */
+#define   IMPROMOEN				(1 << 10) /* promo is immediate or delayed until next idle interval (only for timeout method above) */
+#define   RCENTSYNC				(1 << 9) /* rs coupled to cpu c-state (3/6/7) */
+#define   STATELOCK				(1 << 7) /* locked to rs_cstate if 0 */
+#define   RS_CSTATE_MASK			(3 << 4)
+#define   RS_CSTATE_C367_RS1			(0 << 4)
+#define   RS_CSTATE_C36_RS1_C7_RS2		(1 << 4)
+#define   RS_CSTATE_RSVD			(2 << 4)
+#define   RS_CSTATE_C367_RS2			(3 << 4)
+#define   REDSAVES				(1 << 3) /* no context save if was idle during rs0 */
+#define   REDRESTORES				(1 << 2) /* no restore if was idle during rs0 */
+#define VIDCTL					_MMIO(0x111c0)
+#define VIDSTS					_MMIO(0x111c8)
+#define VIDSTART				_MMIO(0x111cc) /* 8 bits */
+#define MEMSTAT_ILK				_MMIO(0x111f8)
+#define   MEMSTAT_VID_MASK			0x7f00
+#define   MEMSTAT_VID_SHIFT			8
+#define   MEMSTAT_PSTATE_MASK			0x00f8
+#define   MEMSTAT_PSTATE_SHIFT			3
+#define   MEMSTAT_MON_ACTV			(1 << 2)
+#define   MEMSTAT_SRC_CTL_MASK			0x0003
+#define   MEMSTAT_SRC_CTL_CORE			0
+#define   MEMSTAT_SRC_CTL_TRB			1
+#define   MEMSTAT_SRC_CTL_THM			2
+#define   MEMSTAT_SRC_CTL_STDBY			3
+#define PMMISC					_MMIO(0x11214)
+#define   MCPPCE_EN				(1 << 0) /* enable PM_MSG from PCH->MPC */
+#define SDEW					_MMIO(0x1124c)
+#define CSIEW0					_MMIO(0x11250)
+#define CSIEW1					_MMIO(0x11254)
+#define CSIEW2					_MMIO(0x11258)
+#define PEW(i)					_MMIO(0x1125c + (i) * 4) /* 5 registers */
+#define DEW(i)					_MMIO(0x11270 + (i) * 4) /* 3 registers */
+#define MCHAFE					_MMIO(0x112c0)
+#define CSIEC					_MMIO(0x112e0)
+#define DMIEC					_MMIO(0x112e4)
+#define DDREC					_MMIO(0x112e8)
+#define PEG0EC					_MMIO(0x112ec)
+#define PEG1EC					_MMIO(0x112f0)
+#define GFXEC					_MMIO(0x112f4)
+#define INTTOEXT_BASE_ILK			_MMIO(0x11300)
+#define RPPREVBSYTUPAVG				_MMIO(0x113b8)
+#define RCPREVBSYTUPAVG				_MMIO(0x113b8)
+#define RCPREVBSYTDNAVG				_MMIO(0x113bc)
+#define RPPREVBSYTDNAVG				_MMIO(0x113bc)
+#define ECR					_MMIO(0x11600)
+#define   ECR_GPFE				(1 << 31)
+#define   ECR_IMONE				(1 << 30)
+#define   ECR_CAP_MASK				0x0000001f /* Event range, 0-31 */
+#define OGW0					_MMIO(0x11608)
+#define OGW1					_MMIO(0x1160c)
+#define EG0					_MMIO(0x11610)
+#define EG1					_MMIO(0x11614)
+#define EG2					_MMIO(0x11618)
+#define EG3					_MMIO(0x1161c)
+#define EG4					_MMIO(0x11620)
+#define EG5					_MMIO(0x11624)
+#define EG6					_MMIO(0x11628)
+#define EG7					_MMIO(0x1162c)
+#define PXW(i)					_MMIO(0x11664 + (i) * 4) /* 4 registers */
+#define PXWL(i)					_MMIO(0x11680 + (i) * 8) /* 8 registers */
+#define LCFUSE02				_MMIO(0x116c0)
+#define   LCFUSE_HIV_MASK			0x000000ff
+
+#define GAC_ECO_BITS				_MMIO(0x14090)
+#define   ECOBITS_SNB_BIT			(1 << 13)
+#define   ECOBITS_PPGTT_CACHE64B		(3 << 8)
+#define   ECOBITS_PPGTT_CACHE4B			(0 << 8)
+
+#define CHV_FUSE_GT				_MMIO(VLV_DISPLAY_BASE + 0x2168)
+#define   CHV_FGT_DISABLE_SS0			(1 << 10)
+#define   CHV_FGT_DISABLE_SS1			(1 << 11)
+#define   CHV_FGT_EU_DIS_SS0_R0_SHIFT		16
+#define   CHV_FGT_EU_DIS_SS0_R0_MASK		(0xf << CHV_FGT_EU_DIS_SS0_R0_SHIFT)
+#define   CHV_FGT_EU_DIS_SS0_R1_SHIFT		20
+#define   CHV_FGT_EU_DIS_SS0_R1_MASK		(0xf << CHV_FGT_EU_DIS_SS0_R1_SHIFT)
+#define   CHV_FGT_EU_DIS_SS1_R0_SHIFT		24
+#define   CHV_FGT_EU_DIS_SS1_R0_MASK		(0xf << CHV_FGT_EU_DIS_SS1_R0_SHIFT)
+#define   CHV_FGT_EU_DIS_SS1_R1_SHIFT		28
+#define   CHV_FGT_EU_DIS_SS1_R1_MASK		(0xf << CHV_FGT_EU_DIS_SS1_R1_SHIFT)
+
+#define BCS_SWCTRL				_MMIO(0x22200)
+#define   BCS_SRC_Y				REG_BIT(0)
+#define   BCS_DST_Y				REG_BIT(1)
+
+#define GAB_CTL					_MMIO(0x24000)
+#define   GAB_CTL_CONT_AFTER_PAGEFAULT		(1 << 8)
 
-#define HALF_SLICE_CHICKEN3		_MMIO(0xe184)
-#define   HSW_SAMPLE_C_PERFORMANCE	(1 << 9)
-#define   GEN8_CENTROID_PIXEL_OPT_DIS	(1 << 8)
-#define   GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC	(1 << 5)
-#define   GEN8_SAMPLER_POWER_BYPASS_DIS	(1 << 1)
+#define GEN6_PMISR				_MMIO(0x44020)
+#define GEN6_PMIMR				_MMIO(0x44024) /* rps_lock */
+#define GEN6_PMIIR				_MMIO(0x44028)
+#define GEN6_PMIER				_MMIO(0x4402c)
+#define   GEN6_PM_MBOX_EVENT			(1 << 25)
+#define   GEN6_PM_THERMAL_EVENT			(1 << 24)
+/*
+ * For Gen11 these are in the upper word of the GPM_WGBOXPERF
+ * registers. Shifting is handled on accessing the imr and ier.
+ */
+#define   GEN6_PM_RP_DOWN_TIMEOUT		(1 << 6)
+#define   GEN6_PM_RP_UP_THRESHOLD		(1 << 5)
+#define   GEN6_PM_RP_DOWN_THRESHOLD		(1 << 4)
+#define   GEN6_PM_RP_UP_EI_EXPIRED		(1 << 2)
+#define   GEN6_PM_RP_DOWN_EI_EXPIRED		(1 << 1)
+#define   GEN6_PM_RPS_EVENTS			(GEN6_PM_RP_UP_EI_EXPIRED   | \
+						 GEN6_PM_RP_UP_THRESHOLD    | \
+						 GEN6_PM_RP_DOWN_EI_EXPIRED | \
+						 GEN6_PM_RP_DOWN_THRESHOLD  | \
+						 GEN6_PM_RP_DOWN_TIMEOUT)
 
-#define GEN9_HALF_SLICE_CHICKEN7	_MMIO(0xe194)
-#define   DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA	REG_BIT(15)
-#define   GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR	REG_BIT(8)
-#define   GEN9_ENABLE_YV12_BUGFIX			REG_BIT(4)
-#define   GEN9_ENABLE_GPGPU_PREEMPTION			REG_BIT(2)
+#define GEN7_GT_SCRATCH(i)			_MMIO(0x4f100 + (i) * 4)
+#define   GEN7_GT_SCRATCH_REG_NUM		8
+
+#define GFX_FLSH_CNTL_GEN6			_MMIO(0x101008)
+#define   GFX_FLSH_CNTL_EN			(1 << 0)
+
+#define GTFIFODBG				_MMIO(0x120000)
+#define   GT_FIFO_SBDEDICATE_FREE_ENTRY_CHV	(0x1f << 20)
+#define   GT_FIFO_FREE_ENTRIES_CHV		(0x7f << 13)
+#define   GT_FIFO_SBDROPERR			(1 << 6)
+#define   GT_FIFO_BLOBDROPERR			(1 << 5)
+#define   GT_FIFO_SB_READ_ABORTERR		(1 << 4)
+#define   GT_FIFO_DROPERR			(1 << 3)
+#define   GT_FIFO_OVFERR			(1 << 2)
+#define   GT_FIFO_IAWRERR			(1 << 1)
+#define   GT_FIFO_IARDERR			(1 << 0)
+
+#define GTFIFOCTL				_MMIO(0x120008)
+#define   GT_FIFO_FREE_ENTRIES_MASK		0x7f
+#define   GT_FIFO_NUM_RESERVED_ENTRIES		20
+#define   GT_FIFO_CTL_BLOCK_ALL_POLICY_STALL	(1 << 12)
+#define   GT_FIFO_CTL_RC6_POLICY_STALL		(1 << 11)
+
+#define FORCEWAKE_MT_ACK			_MMIO(0x130040)
+#define FORCEWAKE_ACK_HSW			_MMIO(0x130044)
+#define FORCEWAKE_ACK_GT_GEN9			_MMIO(0x130044)
+#define   FORCEWAKE_KERNEL			BIT(0)
+#define   FORCEWAKE_USER			BIT(1)
+#define   FORCEWAKE_KERNEL_FALLBACK		BIT(15)
+#define FORCEWAKE_ACK				_MMIO(0x130090)
+#define VLV_GTLC_WAKE_CTRL			_MMIO(0x130090)
+#define   VLV_GTLC_RENDER_CTX_EXISTS		(1 << 25)
+#define   VLV_GTLC_MEDIA_CTX_EXISTS		(1 << 24)
+#define   VLV_GTLC_ALLOWWAKEREQ			(1 << 0)
+#define VLV_GTLC_PW_STATUS			_MMIO(0x130094)
+#define   VLV_GTLC_ALLOWWAKEACK			(1 << 0)
+#define   VLV_GTLC_ALLOWWAKEERR			(1 << 1)
+#define   VLV_GTLC_PW_MEDIA_STATUS_MASK		(1 << 5)
+#define   VLV_GTLC_PW_RENDER_STATUS_MASK	(1 << 7)
+#define VLV_GTLC_SURVIVABILITY_REG		_MMIO(0x130098)
+#define   VLV_GFX_CLK_STATUS_BIT		(1 << 3)
+#define   VLV_GFX_CLK_FORCE_ON_BIT		(1 << 2)
+#define FORCEWAKE_VLV				_MMIO(0x1300b0)
+#define FORCEWAKE_ACK_VLV			_MMIO(0x1300b4)
+#define FORCEWAKE_MEDIA_VLV			_MMIO(0x1300b8)
+#define FORCEWAKE_ACK_MEDIA_VLV			_MMIO(0x1300bc)
+
+#define GEN6_GT_THREAD_STATUS_REG		_MMIO(0x13805c)
+#define   GEN6_GT_THREAD_STATUS_CORE_MASK	0x7
+
+#define GEN6_GT_CORE_STATUS			_MMIO(0x138060)
+#define   GEN6_CORE_CPD_STATE_MASK		(7 << 4)
+#define   GEN6_RCn_MASK				7
+#define   GEN6_RC0				0
+#define   GEN6_RC3				2
+#define   GEN6_RC6				3
+#define   GEN6_RC7				4
+
+#define GEN8_GT_SLICE_INFO			_MMIO(0x138064)
+#define   GEN8_LSLICESTAT_MASK			0x7
 
-/* MOCS (Memory Object Control State) registers */
-#define GEN9_LNCFCMOCS(i)	_MMIO(0xb020 + (i) * 4)	/* L3 Cache Control */
-#define GEN9_LNCFCMOCS_REG_COUNT	32
-
-#define __GEN9_RCS0_MOCS0	0xc800
-#define GEN9_GFX_MOCS(i)	_MMIO(__GEN9_RCS0_MOCS0 + (i) * 4)
-#define __GEN9_VCS0_MOCS0	0xc900
-#define GEN9_MFX0_MOCS(i)	_MMIO(__GEN9_VCS0_MOCS0 + (i) * 4)
-#define __GEN9_VCS1_MOCS0	0xca00
-#define GEN9_MFX1_MOCS(i)	_MMIO(__GEN9_VCS1_MOCS0 + (i) * 4)
-#define __GEN9_VECS0_MOCS0	0xcb00
-#define GEN9_VEBOX_MOCS(i)	_MMIO(__GEN9_VECS0_MOCS0 + (i) * 4)
-#define __GEN9_BCS0_MOCS0	0xcc00
-#define GEN9_BLT_MOCS(i)	_MMIO(__GEN9_BCS0_MOCS0 + (i) * 4)
-#define __GEN11_VCS2_MOCS0	0x10000
-#define GEN11_MFX2_MOCS(i)	_MMIO(__GEN11_VCS2_MOCS0 + (i) * 4)
-
-#define GEN9_SCRATCH_LNCF1		_MMIO(0xb008)
-#define   GEN9_LNCF_NONIA_COHERENT_ATOMICS_ENABLE REG_BIT(0)
-
-#define GEN9_SCRATCH1			_MMIO(0xb11c)
-#define   EVICTION_PERF_FIX_ENABLE	REG_BIT(8)
-
-#define GEN10_SCRATCH_LNCF2		_MMIO(0xb0a0)
-#define   PMFLUSHDONE_LNICRSDROP	(1 << 20)
-#define   PMFLUSH_GAPL3UNBLOCK		(1 << 21)
-#define   PMFLUSHDONE_LNEBLK		(1 << 22)
-
-#define XEHP_L3NODEARBCFG		_MMIO(0xb0b4)
-#define   XEHP_LNESPARE			REG_BIT(19)
-
-#define GEN12_GLOBAL_MOCS(i)	_MMIO(0x4000 + (i) * 4) /* Global MOCS regs */
+#define GEN6_GT_GFX_RC6_LOCKED			_MMIO(0x138104)
+#define VLV_COUNTER_CONTROL			_MMIO(0x138104)
+#define   VLV_COUNT_RANGE_HIGH			(1 << 15)
+#define   VLV_MEDIA_RC0_COUNT_EN		(1 << 5)
+#define   VLV_RENDER_RC0_COUNT_EN		(1 << 4)
+#define   VLV_MEDIA_RC6_COUNT_EN		(1 << 1)
+#define   VLV_RENDER_RC6_COUNT_EN		(1 << 0)
+#define GEN6_GT_GFX_RC6				_MMIO(0x138108)
+#define VLV_GT_RENDER_RC6			_MMIO(0x138108)
+#define VLV_GT_MEDIA_RC6			_MMIO(0x13810c)
 
-/* gamt regs */
-#define GEN8_L3_LRA_1_GPGPU _MMIO(0x4dd4)
-#define   GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW  0x67F1427F /* max/min for LRA1/2 */
-#define   GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV  0x5FF101FF /* max/min for LRA1/2 */
-#define   GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL  0x67F1427F /*    "        " */
-#define   GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT  0x5FF101FF /*    "        " */
+#define GEN6_GT_GFX_RC6p			_MMIO(0x13810c)
+#define GEN6_GT_GFX_RC6pp			_MMIO(0x138110)
+#define VLV_RENDER_C0_COUNT			_MMIO(0x138118)
+#define VLV_MEDIA_C0_COUNT			_MMIO(0x13811c)
+
+#define GEN11_GT_INTR_DW(x)			_MMIO(0x190018 + ((x) * 4))
+#define   GEN11_CSME				(31)
+#define   GEN11_GUNIT				(28)
+#define   GEN11_GUC				(25)
+#define   GEN11_WDPERF				(20)
+#define   GEN11_KCR				(19)
+#define   GEN11_GTPM				(16)
+#define   GEN11_BCS				(15)
+#define   GEN11_RCS0				(0)
+#define   GEN11_VECS(x)				(31 - (x))
+#define   GEN11_VCS(x)				(x)
+
+#define GEN11_RENDER_COPY_INTR_ENABLE		_MMIO(0x190030)
+#define GEN11_VCS_VECS_INTR_ENABLE		_MMIO(0x190034)
+#define GEN11_GUC_SG_INTR_ENABLE		_MMIO(0x190038)
+#define   ENGINE1_MASK				REG_GENMASK(31, 16)
+#define   ENGINE0_MASK				REG_GENMASK(15, 0)
+#define GEN11_GPM_WGBOXPERF_INTR_ENABLE		_MMIO(0x19003c)
+#define GEN11_CRYPTO_RSVD_INTR_ENABLE		_MMIO(0x190040)
+#define GEN11_GUNIT_CSME_INTR_ENABLE		_MMIO(0x190044)
+
+#define GEN11_INTR_IDENTITY_REG(x)		_MMIO(0x190060 + ((x) * 4))
+#define   GEN11_INTR_DATA_VALID			(1 << 31)
+#define   GEN11_INTR_ENGINE_CLASS(x)		(((x) & GENMASK(18, 16)) >> 16)
+#define   GEN11_INTR_ENGINE_INSTANCE(x)		(((x) & GENMASK(25, 20)) >> 20)
+#define   GEN11_INTR_ENGINE_INTR(x)		((x) & 0xffff)
+/* irq instances for OTHER_CLASS */
+#define   OTHER_GUC_INSTANCE			0
+#define   OTHER_GTPM_INSTANCE			1
+#define   OTHER_KCR_INSTANCE			4
+
+#define GEN11_IIR_REG_SELECTOR(x)		_MMIO(0x190070 + ((x) * 4))
+
+#define GEN11_RCS0_RSVD_INTR_MASK		_MMIO(0x190090)
+#define GEN11_BCS_RSVD_INTR_MASK		_MMIO(0x1900a0)
+#define GEN11_VCS0_VCS1_INTR_MASK		_MMIO(0x1900a8)
+#define GEN11_VCS2_VCS3_INTR_MASK		_MMIO(0x1900ac)
+#define GEN12_VCS4_VCS5_INTR_MASK		_MMIO(0x1900b0)
+#define GEN12_VCS6_VCS7_INTR_MASK		_MMIO(0x1900b4)
+#define GEN11_VECS0_VECS1_INTR_MASK		_MMIO(0x1900d0)
+#define GEN12_VECS2_VECS3_INTR_MASK		_MMIO(0x1900d4)
+#define GEN11_GUC_SG_INTR_MASK			_MMIO(0x1900e8)
+#define GEN11_GPM_WGBOXPERF_INTR_MASK		_MMIO(0x1900ec)
+#define GEN11_CRYPTO_RSVD_INTR_MASK		_MMIO(0x1900f0)
+#define GEN11_GUNIT_CSME_INTR_MASK		_MMIO(0x1900f4)
+
+#define GEN12_SFC_DONE(n)			_MMIO(0x1cc000 + (n) * 0x1000)
+
+enum {
+	INTEL_ADVANCED_CONTEXT = 0,
+	INTEL_LEGACY_32B_CONTEXT,
+	INTEL_ADVANCED_AD_CONTEXT,
+	INTEL_LEGACY_64B_CONTEXT
+};
 
-#define MMCD_MISC_CTRL		_MMIO(0x4ddc) /* skl+ */
-#define  MMCD_PCLA		(1 << 31)
-#define  MMCD_HOTSPOT_EN	(1 << 27)
+enum {
+	FAULT_AND_HANG = 0,
+	FAULT_AND_HALT, /* Debug only */
+	FAULT_AND_STREAM,
+	FAULT_AND_CONTINUE /* Unsupported */
+};
 
-#define SLICE_COMMON_ECO_CHICKEN1		_MMIO(0x731C)
-#define   MSC_MSAA_REODER_BUF_BYPASS_DISABLE	REG_BIT(14)
+#define   CTX_GTT_ADDRESS_MASK			GENMASK(31, 12)
+#define   GEN8_CTX_VALID			(1 << 0)
+#define   GEN8_CTX_FORCE_PD_RESTORE		(1 << 1)
+#define   GEN8_CTX_FORCE_RESTORE		(1 << 2)
+#define   GEN8_CTX_L3LLC_COHERENT		(1 << 5)
+#define   GEN8_CTX_PRIVILEGE			(1 << 8)
+#define   GEN8_CTX_ADDRESSING_MODE_SHIFT	3
+#define   GEN8_CTX_ID_SHIFT			32
+#define   GEN8_CTX_ID_WIDTH			21
+#define   GEN11_SW_CTX_ID_SHIFT			37
+#define   GEN11_SW_CTX_ID_WIDTH			11
+#define   GEN11_ENGINE_CLASS_SHIFT		61
+#define   GEN11_ENGINE_CLASS_WIDTH		3
+#define   GEN11_ENGINE_INSTANCE_SHIFT		48
+#define   GEN11_ENGINE_INSTANCE_WIDTH		6
+#define   XEHP_SW_CTX_ID_SHIFT			39
+#define   XEHP_SW_CTX_ID_WIDTH			16
+#define   XEHP_SW_COUNTER_SHIFT			58
+#define   XEHP_SW_COUNTER_WIDTH			6
 
 #endif /* __INTEL_GT_REGS__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
index b5d3cde08aac19d7555a3d48be78f35dde61d49e..49a8fb63e6e5fd300770ea94682d71981507ff3b 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -10,6 +10,7 @@
 
 #include <drm/drm_cache.h>
 
+#include "gem/i915_gem_internal.h"
 #include "gem/i915_gem_lmem.h"
 #include "i915_trace.h"
 #include "intel_gt.h"
@@ -162,6 +163,9 @@ static void __i915_vm_release(struct work_struct *work)
 	struct i915_address_space *vm =
 		container_of(work, struct i915_address_space, release_work);
 
+	/* Synchronize async unbinds. */
+	i915_vma_resource_bind_dep_sync_all(vm);
+
 	vm->cleanup(vm);
 	i915_address_space_fini(vm);
 
@@ -190,6 +194,7 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass)
 	if (!kref_read(&vm->resv_ref))
 		kref_init(&vm->resv_ref);
 
+	vm->pending_unbind = RB_ROOT_CACHED;
 	INIT_WORK(&vm->release_work, __i915_vm_release);
 	atomic_set(&vm->open, 1);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
index 177b42b935a12dbc065e45919c743bec0ff6ff53..8073438b67c83d73a8c53035cb6311496164cd7e 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -27,6 +27,7 @@
 
 #include "gt/intel_reset.h"
 #include "i915_selftest.h"
+#include "i915_vma_resource.h"
 #include "i915_vma_types.h"
 
 #define I915_GFP_ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
@@ -200,7 +201,7 @@ struct i915_vma_ops {
 	/* Map an object into an address space with the given cache flags. */
 	void (*bind_vma)(struct i915_address_space *vm,
 			 struct i915_vm_pt_stash *stash,
-			 struct i915_vma *vma,
+			 struct i915_vma_resource *vma_res,
 			 enum i915_cache_level cache_level,
 			 u32 flags);
 	/*
@@ -208,7 +209,8 @@ struct i915_vma_ops {
 	 * setting the valid PTE entries to a reserved scratch page.
 	 */
 	void (*unbind_vma)(struct i915_address_space *vm,
-			   struct i915_vma *vma);
+			   struct i915_vma_resource *vma_res);
+
 };
 
 struct i915_address_space {
@@ -263,6 +265,9 @@ struct i915_address_space {
 	/* Flags used when creating page-table objects for this vm */
 	unsigned long lmem_pt_obj_flags;
 
+	/* Interval tree for pending unbind vma resources */
+	struct rb_root_cached pending_unbind;
+
 	struct drm_i915_gem_object *
 		(*alloc_pt_dma)(struct i915_address_space *vm, int sz);
 	struct drm_i915_gem_object *
@@ -285,7 +290,7 @@ struct i915_address_space {
 			    enum i915_cache_level cache_level,
 			    u32 flags);
 	void (*insert_entries)(struct i915_address_space *vm,
-			       struct i915_vma *vma,
+			       struct i915_vma_resource *vma_res,
 			       enum i915_cache_level cache_level,
 			       u32 flags);
 	void (*cleanup)(struct i915_address_space *vm);
@@ -600,11 +605,11 @@ void gen6_ggtt_invalidate(struct i915_ggtt *ggtt);
 
 void ppgtt_bind_vma(struct i915_address_space *vm,
 		    struct i915_vm_pt_stash *stash,
-		    struct i915_vma *vma,
+		    struct i915_vma_resource *vma_res,
 		    enum i915_cache_level cache_level,
 		    u32 flags);
 void ppgtt_unbind_vma(struct i915_address_space *vm,
-		      struct i915_vma *vma);
+		      struct i915_vma_resource *vma_res);
 
 void gtt_write_workarounds(struct intel_gt *gt);
 
@@ -627,8 +632,8 @@ __vm_create_scratch_for_read_pinned(struct i915_address_space *vm, unsigned long
 static inline struct sgt_dma {
 	struct scatterlist *sg;
 	dma_addr_t dma, max;
-} sgt_dma(struct i915_vma *vma) {
-	struct scatterlist *sg = vma->pages->sgl;
+} sgt_dma(struct i915_vma_resource *vma_res) {
+	struct scatterlist *sg = vma_res->bi.pages->sgl;
 	dma_addr_t addr = sg_dma_address(sg);
 
 	return (struct sgt_dma){ sg, addr, addr + sg_dma_len(sg) };
diff --git a/drivers/gpu/drm/i915/gt/intel_llc.c b/drivers/gpu/drm/i915/gt/intel_llc.c
index 335c65758d6f58e4f99c87b94db407fbf7b8514f..40e2e28ee6c757210348bdf676bdfc0a72532724 100644
--- a/drivers/gpu/drm/i915/gt/intel_llc.c
+++ b/drivers/gpu/drm/i915/gt/intel_llc.c
@@ -10,6 +10,7 @@
 #include "i915_reg.h"
 #include "intel_gt.h"
 #include "intel_llc.h"
+#include "intel_mchbar_regs.h"
 #include "intel_pcode.h"
 
 struct ia_constants {
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index e46bf1429f2cd981c0ab01e8a7d4bb38ce4ce8af..004e1216e65470f05496366c756c49f8c91e48a0 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -8,6 +8,8 @@
 #include "gen8_engine_cs.h"
 #include "i915_drv.h"
 #include "i915_perf.h"
+#include "i915_reg.h"
+#include "intel_context.h"
 #include "intel_engine.h"
 #include "intel_engine_regs.h"
 #include "intel_gpu_commands.h"
@@ -1067,6 +1069,10 @@ lrc_pin(struct intel_context *ce,
 
 void lrc_unpin(struct intel_context *ce)
 {
+	if (unlikely(ce->parallel.last_rq)) {
+		i915_request_put(ce->parallel.last_rq);
+		ce->parallel.last_rq = NULL;
+	}
 	check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET,
 		      ce->engine);
 }
@@ -1162,6 +1168,29 @@ gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs)
 	return cs;
 }
 
+/*
+ * On DG2 during context restore of a preempted context in GPGPU mode,
+ * RCS restore hang is detected. This is extremely timing dependent.
+ * To address this below sw wabb is implemented for DG2 A steppings.
+ */
+static u32 *
+dg2_emit_rcs_hang_wabb(const struct intel_context *ce, u32 *cs)
+{
+	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = i915_mmio_reg_offset(GEN12_STATE_ACK_DEBUG);
+	*cs++ = 0x21;
+
+	*cs++ = MI_LOAD_REGISTER_REG;
+	*cs++ = i915_mmio_reg_offset(RING_NOPID(ce->engine->mmio_base));
+	*cs++ = i915_mmio_reg_offset(GEN12_CULLBIT1);
+
+	*cs++ = MI_LOAD_REGISTER_REG;
+	*cs++ = i915_mmio_reg_offset(RING_NOPID(ce->engine->mmio_base));
+	*cs++ = i915_mmio_reg_offset(GEN12_CULLBIT2);
+
+	return cs;
+}
+
 static u32 *
 gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
 {
@@ -1169,6 +1198,11 @@ gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
 	cs = gen12_emit_cmd_buf_wa(ce, cs);
 	cs = gen12_emit_restore_scratch(ce, cs);
 
+	/* Wa_22011450934:dg2 */
+	if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_A0, STEP_B0) ||
+	    IS_DG2_GRAPHICS_STEP(ce->engine->i915, G11, STEP_A0, STEP_B0))
+		cs = dg2_emit_rcs_hang_wabb(ce, cs);
+
 	/* Wa_16013000631:dg2 */
 	if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_B0, STEP_C0) ||
 	    IS_DG2_G11(ce->engine->i915))
@@ -1688,6 +1722,17 @@ static void st_update_runtime_underflow(struct intel_context *ce, s32 dt)
 #endif
 }
 
+static u32 lrc_get_runtime(const struct intel_context *ce)
+{
+	/*
+	 * We can use either ppHWSP[16] which is recorded before the context
+	 * switch (and so excludes the cost of context switches) or use the
+	 * value from the context image itself, which is saved/restored earlier
+	 * and so includes the cost of the save.
+	 */
+	return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
+}
+
 void lrc_update_runtime(struct intel_context *ce)
 {
 	u32 old;
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
index 7f697845c4cf6d6da3b1c99ed59e2db8b0d73186..0b76f096b559fc4df180be7b40f48ebc74e80b1c 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
@@ -8,12 +8,12 @@
 
 #include <linux/types.h>
 
-#include "intel_context.h"
-#include "intel_lrc_reg.h"
-
 struct drm_i915_gem_object;
+struct i915_gem_ww_ctx;
+struct intel_context;
 struct intel_engine_cs;
 struct intel_ring;
+struct kref;
 
 /* At the start of the context image is its per-process HWS page */
 #define LRC_PPHWSP_PN	(0)
@@ -68,15 +68,5 @@ void lrc_check_regs(const struct intel_context *ce,
 		    const char *when);
 
 void lrc_update_runtime(struct intel_context *ce);
-static inline u32 lrc_get_runtime(const struct intel_context *ce)
-{
-	/*
-	 * We can use either ppHWSP[16] which is recorded before the context
-	 * switch (and so excludes the cost of context switches) or use the
-	 * value from the context image itself, which is saved/restored earlier
-	 * and so includes the cost of the save.
-	 */
-	return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
-}
 
 #endif /* __INTEL_LRC_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
index 083b3090c69c7b21e3a60c785ecbc97599774fa6..48e6e2f877007f9607025457256af0059451c80e 100644
--- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
@@ -179,32 +179,34 @@ struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt,
 
 void ppgtt_bind_vma(struct i915_address_space *vm,
 		    struct i915_vm_pt_stash *stash,
-		    struct i915_vma *vma,
+		    struct i915_vma_resource *vma_res,
 		    enum i915_cache_level cache_level,
 		    u32 flags)
 {
 	u32 pte_flags;
 
-	if (!test_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))) {
-		vm->allocate_va_range(vm, stash, vma->node.start, vma->size);
-		set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma));
+	if (!vma_res->allocated) {
+		vm->allocate_va_range(vm, stash, vma_res->start,
+				      vma_res->vma_size);
+		vma_res->allocated = true;
 	}
 
 	/* Applicable to VLV, and gen8+ */
 	pte_flags = 0;
-	if (i915_gem_object_is_readonly(vma->obj))
+	if (vma_res->bi.readonly)
 		pte_flags |= PTE_READ_ONLY;
-	if (i915_gem_object_is_lmem(vma->obj))
+	if (vma_res->bi.lmem)
 		pte_flags |= PTE_LM;
 
-	vm->insert_entries(vm, vma, cache_level, pte_flags);
+	vm->insert_entries(vm, vma_res, cache_level, pte_flags);
 	wmb();
 }
 
-void ppgtt_unbind_vma(struct i915_address_space *vm, struct i915_vma *vma)
+void ppgtt_unbind_vma(struct i915_address_space *vm,
+		      struct i915_vma_resource *vma_res)
 {
-	if (test_and_clear_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma)))
-		vm->clear_range(vm, vma->node.start, vma->size);
+	if (vma_res->allocated)
+		vm->clear_range(vm, vma_res->start, vma_res->vma_size);
 }
 
 static unsigned long pd_count(u64 size, int shift)
diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c
index cb5a67c98f300d8c7ee31dd4650a1860b1b07699..a04e0cf4a94b9ce914a1aaf62cd131b8dafe40e0 100644
--- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c
+++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c
@@ -12,11 +12,12 @@
 #include "gem/i915_gem_region.h"
 #include "gem/i915_gem_ttm.h"
 #include "gt/intel_gt.h"
+#include "gt/intel_gt_regs.h"
 
 static int init_fake_lmem_bar(struct intel_memory_region *mem)
 {
 	struct drm_i915_private *i915 = mem->i915;
-	struct i915_ggtt *ggtt = &i915->ggtt;
+	struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
 	unsigned long n;
 	int ret;
 
@@ -132,7 +133,7 @@ intel_gt_setup_fake_lmem(struct intel_gt *gt)
 	if (!i915->params.fake_lmem_start)
 		return ERR_PTR(-ENODEV);
 
-	GEM_BUG_ON(i915_ggtt_has_aperture(&i915->ggtt));
+	GEM_BUG_ON(i915_ggtt_has_aperture(to_gt(i915)->ggtt));
 
 	/* Your mappable aperture belongs to me now! */
 	mappable_end = pci_resource_len(pdev, 2);
diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.c b/drivers/gpu/drm/i915/gt/intel_renderstate.c
index b575cd6e0b7a8aa23545b8b84429f718cf4f7563..5121e6dc2fa53eecae78ca176d1ca3aa5c0d63d8 100644
--- a/drivers/gpu/drm/i915/gt/intel_renderstate.c
+++ b/drivers/gpu/drm/i915/gt/intel_renderstate.c
@@ -3,6 +3,8 @@
  * Copyright © 2014 Intel Corporation
  */
 
+#include "gem/i915_gem_internal.h"
+
 #include "i915_drv.h"
 #include "intel_renderstate.h"
 #include "intel_context.h"
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index 59beb69ff6f215d0e2adb3db04ee82e8c5262278..82713264b96c11bec1031f656f5315b677cc746d 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -14,6 +14,7 @@
 #include "gt/intel_gt_regs.h"
 
 #include "i915_drv.h"
+#include "i915_file_private.h"
 #include "i915_gpu_error.h"
 #include "i915_irq.h"
 #include "intel_breadcrumbs.h"
@@ -22,6 +23,7 @@
 #include "intel_gt.h"
 #include "intel_gt_pm.h"
 #include "intel_gt_requests.h"
+#include "intel_mchbar_regs.h"
 #include "intel_pci_config.h"
 #include "intel_reset.h"
 
@@ -347,25 +349,25 @@ static void get_sfc_forced_lock_data(struct intel_engine_cs *engine,
 		MISSING_CASE(engine->class);
 		fallthrough;
 	case VIDEO_DECODE_CLASS:
-		sfc_lock->lock_reg = GEN11_VCS_SFC_FORCED_LOCK(engine);
+		sfc_lock->lock_reg = GEN11_VCS_SFC_FORCED_LOCK(engine->mmio_base);
 		sfc_lock->lock_bit = GEN11_VCS_SFC_FORCED_LOCK_BIT;
 
-		sfc_lock->ack_reg = GEN11_VCS_SFC_LOCK_STATUS(engine);
+		sfc_lock->ack_reg = GEN11_VCS_SFC_LOCK_STATUS(engine->mmio_base);
 		sfc_lock->ack_bit  = GEN11_VCS_SFC_LOCK_ACK_BIT;
 
-		sfc_lock->usage_reg = GEN11_VCS_SFC_LOCK_STATUS(engine);
+		sfc_lock->usage_reg = GEN11_VCS_SFC_LOCK_STATUS(engine->mmio_base);
 		sfc_lock->usage_bit = GEN11_VCS_SFC_USAGE_BIT;
 		sfc_lock->reset_bit = GEN11_VCS_SFC_RESET_BIT(engine->instance);
 
 		break;
 	case VIDEO_ENHANCEMENT_CLASS:
-		sfc_lock->lock_reg = GEN11_VECS_SFC_FORCED_LOCK(engine);
+		sfc_lock->lock_reg = GEN11_VECS_SFC_FORCED_LOCK(engine->mmio_base);
 		sfc_lock->lock_bit = GEN11_VECS_SFC_FORCED_LOCK_BIT;
 
-		sfc_lock->ack_reg = GEN11_VECS_SFC_LOCK_ACK(engine);
+		sfc_lock->ack_reg = GEN11_VECS_SFC_LOCK_ACK(engine->mmio_base);
 		sfc_lock->ack_bit  = GEN11_VECS_SFC_LOCK_ACK_BIT;
 
-		sfc_lock->usage_reg = GEN11_VECS_SFC_USAGE(engine);
+		sfc_lock->usage_reg = GEN11_VECS_SFC_USAGE(engine->mmio_base);
 		sfc_lock->usage_bit = GEN11_VECS_SFC_USAGE_BIT;
 		sfc_lock->reset_bit = GEN11_VECS_SFC_RESET_BIT(engine->instance);
 
@@ -412,7 +414,7 @@ static int gen11_lock_sfc(struct intel_engine_cs *engine,
 		 * forced lock on the VE engine that shares the same SFC.
 		 */
 		if (!(intel_uncore_read_fw(uncore,
-					   GEN12_HCP_SFC_LOCK_STATUS(engine)) &
+					   GEN12_HCP_SFC_LOCK_STATUS(engine->mmio_base)) &
 		      GEN12_HCP_SFC_USAGE_BIT))
 			return 0;
 
@@ -602,6 +604,15 @@ static int gen8_reset_engines(struct intel_gt *gt,
 		 */
 	}
 
+	/*
+	 * Wa_22011100796:dg2, whenever Full soft reset is required,
+	 * reset all individual engines firstly, and then do a full soft reset.
+	 *
+	 * This is best effort, so ignore any error from the initial reset.
+	 */
+	if (IS_DG2(gt->i915) && engine_mask == ALL_ENGINES)
+		gen11_reset_engines(gt, gt->info.engine_mask, 0);
+
 	if (GRAPHICS_VER(gt->i915) >= 11)
 		ret = gen11_reset_engines(gt, engine_mask, retry);
 	else
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c
index 723055340c9bc191ee8aa10ebb178e3c0093ea72..40ffcb94e3797c295347f2d0f24faed65b2664d3 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring.c
@@ -3,6 +3,7 @@
  * Copyright © 2019 Intel Corporation
  */
 
+#include "gem/i915_gem_internal.h"
 #include "gem/i915_gem_lmem.h"
 #include "gem/i915_gem_object.h"
 
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 8362eb09092e1afc1abcf6dd83906f7a961486fb..6d7ec3bf1f32f9a64e896da00213ca9f7a6695ed 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -3,6 +3,10 @@
  * Copyright © 2008-2021 Intel Corporation
  */
 
+#include <drm/drm_cache.h>
+
+#include "gem/i915_gem_internal.h"
+
 #include "gen2_engine_cs.h"
 #include "gen6_engine_cs.h"
 #include "gen6_ppgtt.h"
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
index 0cb299cdfc8fec59f8ed383a37008c20cb07ba90..fd95449ed46dada35d5c8581d40e1b671a08367e 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -13,6 +13,7 @@
 #include "intel_gt_irq.h"
 #include "intel_gt_pm_irq.h"
 #include "intel_gt_regs.h"
+#include "intel_mchbar_regs.h"
 #include "intel_pcode.h"
 #include "intel_rps.h"
 #include "vlv_sideband.h"
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index 438bbc7b8147435bb4c2e4aee13d954182651061..b9640212d6595060ae761cc4a01a7b11d2d9d5de 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -3,9 +3,12 @@
  * Copyright © 2016-2018 Intel Corporation
  */
 
-#include "i915_drv.h"
+#include <drm/drm_cache.h>
+
+#include "gem/i915_gem_internal.h"
 
 #include "i915_active.h"
+#include "i915_drv.h"
 #include "i915_syncmap.h"
 #include "intel_gt.h"
 #include "intel_ring.h"
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 59ad231188756fab59154177f796031da16b75b7..26038066e90bab047ff8ba67ea97011fd53f453f 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -237,7 +237,7 @@ static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine,
 	wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING);
 
 	/* WaDisableAsyncFlipPerfMode:bdw,chv */
-	wa_masked_en(wal, MI_MODE, ASYNC_FLIP_PERF_DISABLE);
+	wa_masked_en(wal, RING_MI_MODE(RENDER_RING_BASE), ASYNC_FLIP_PERF_DISABLE);
 
 	/* WaDisablePartialInstShootdown:bdw,chv */
 	wa_masked_en(wal, GEN8_ROW_CHICKEN,
@@ -1510,6 +1510,12 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
 	 */
 	wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
 	wa_write_or(wal, GEN12_SQCM, EN_32B_ACCESS);
+
+	/* Wa_18018781329:dg2 */
+	wa_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
+	wa_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
+	wa_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB);
+	wa_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB);
 }
 
 static void
@@ -2040,7 +2046,12 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 {
 	struct drm_i915_private *i915 = engine->i915;
 
-	if (IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) {
+	if (IS_DG2(i915)) {
+		/* Wa_14015227452:dg2 */
+		wa_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE);
+	}
+
+	if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) {
 		/* Wa_14013392000:dg2_g11 */
 		wa_masked_en(wal, GEN7_ROW_CHICKEN2, GEN12_ENABLE_LARGE_GRF_MODE);
 
@@ -2048,15 +2059,15 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 		wa_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8);
 	}
 
-	if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0) ||
-	    IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) {
+	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0) ||
+	    IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) {
 		/* Wa_14012419201:dg2 */
 		wa_masked_en(wal, GEN9_ROW_CHICKEN4,
 			     GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX);
 	}
 
-	if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_C0) ||
-	    IS_DG2_G11(engine->i915)) {
+	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) ||
+	    IS_DG2_G11(i915)) {
 		/*
 		 * Wa_22012826095:dg2
 		 * Wa_22013059131:dg2
@@ -2071,14 +2082,14 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 	}
 
 	/* Wa_1308578152:dg2_g10 when first gslice is fused off */
-	if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_C0) &&
+	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) &&
 	    needs_wa_1308578152(engine)) {
 		wa_masked_dis(wal, GEN12_CS_DEBUG_MODE1_CCCSUNIT_BE_COMMON,
 			      GEN12_REPLAY_MODE_GRANULARITY);
 	}
 
-	if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_FOREVER) ||
-	    IS_DG2_G11(engine->i915)) {
+	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) ||
+	    IS_DG2_G11(i915) || IS_DG2_G12(i915)) {
 		/* Wa_22013037850:dg2 */
 		wa_write_or(wal, LSC_CHICKEN_BIT_0_UDW,
 			    DISABLE_128B_EVICTION_COMMAND_UDW);
@@ -2095,7 +2106,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 			      LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK);
 	}
 
-	if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) {
+	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) {
 		/*
 		 * Wa_1608949956:dg2_g10
 		 * Wa_14010198302:dg2_g10
@@ -2114,7 +2125,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 		       0, false);
 	}
 
-	if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) {
+	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) {
 		/* Wa_22010430635:dg2 */
 		wa_masked_en(wal,
 			     GEN9_ROW_CHICKEN4,
@@ -2124,8 +2135,8 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 		wa_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE);
 	}
 
-	if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_C0) ||
-	    IS_DG2_G11(engine->i915)) {
+	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_C0) ||
+	    IS_DG2_G11(i915)) {
 		/* Wa_22012654132:dg2 */
 		wa_add(wal, GEN10_CACHE_MODE_SS, 0,
 		       _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC),
@@ -2134,8 +2145,8 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 	}
 
 	/* Wa_14013202645:dg2 */
-	if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_C0) ||
-	    IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0))
+	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) ||
+	    IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0))
 		wa_write_or(wal, RT_CTRL, DIS_NULL_QUERY);
 
 	if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) ||
@@ -2463,7 +2474,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 		 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
 		 */
 		wa_masked_en(wal,
-			     MI_MODE,
+			     RING_MI_MODE(RENDER_RING_BASE),
 			     ASYNC_FLIP_PERF_DISABLE);
 
 	if (GRAPHICS_VER(i915) == 6) {
@@ -2522,7 +2533,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 
 	if (IS_GRAPHICS_VER(i915, 4, 6))
 		/* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
-		wa_add(wal, MI_MODE,
+		wa_add(wal, RING_MI_MODE(RENDER_RING_BASE),
 		       0, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH),
 		       /* XXX bit doesn't stick on Broadwater */
 		       IS_I965G(i915) ? 0 : VS_TIMER_DISPATCH, true);
diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c
index e10da897e07a419cc2ea4e6dcbd89e8a8a44a67c..72d5faab8f9a5581ad696dfc1fc0f5fd3f0244ad 100644
--- a/drivers/gpu/drm/i915/gt/selftest_execlists.c
+++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c
@@ -5,6 +5,7 @@
 
 #include <linux/prime_numbers.h>
 
+#include "gem/i915_gem_internal.h"
 #include "gem/i915_gem_pm.h"
 #include "gt/intel_engine_heartbeat.h"
 #include "gt/intel_reset.h"
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index 4a20ba63446c5acb7d399e6868dbc1400f0984ef..83ff4c2e57c5039928ad6eec6a208c9407285b7f 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -6,6 +6,7 @@
 #include <linux/kthread.h>
 
 #include "gem/i915_gem_context.h"
+#include "gem/i915_gem_internal.h"
 
 #include "i915_gem_evict.h"
 #include "intel_gt.h"
@@ -1383,7 +1384,7 @@ static int evict_vma(void *data)
 	complete(&arg->completion);
 
 	mutex_lock(&vm->mutex);
-	err = i915_gem_evict_for_node(vm, &evict, 0);
+	err = i915_gem_evict_for_node(vm, NULL, &evict, 0);
 	mutex_unlock(&vm->mutex);
 
 	return err;
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 618c905daa19c4c84ae890ed79ab17f7adf4a59b..21c29d315cc0b7de45985313f9c2e644b5b24df1 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -5,6 +5,8 @@
 
 #include <linux/prime_numbers.h>
 
+#include "gem/i915_gem_internal.h"
+
 #include "i915_selftest.h"
 #include "intel_engine_heartbeat.h"
 #include "intel_engine_pm.h"
diff --git a/drivers/gpu/drm/i915/gt/selftest_migrate.c b/drivers/gpu/drm/i915/gt/selftest_migrate.c
index fa4293d2944f1a8e891810f0c885980cdae75960..c9c4f391c5cc0ae714c143772280a680f9f2daf8 100644
--- a/drivers/gpu/drm/i915/gt/selftest_migrate.c
+++ b/drivers/gpu/drm/i915/gt/selftest_migrate.c
@@ -5,6 +5,8 @@
 
 #include <linux/sort.h>
 
+#include "gem/i915_gem_internal.h"
+
 #include "selftests/i915_random.h"
 
 static const unsigned int sizes[] = {
diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c
index 8a873f6bda7fd9e9560eef42aade86563bc10def..37c38bdd5f474cf466a4c600789426780cb79d06 100644
--- a/drivers/gpu/drm/i915/gt/selftest_reset.c
+++ b/drivers/gpu/drm/i915/gt/selftest_reset.c
@@ -19,7 +19,7 @@ __igt_reset_stolen(struct intel_gt *gt,
 		   intel_engine_mask_t mask,
 		   const char *msg)
 {
-	struct i915_ggtt *ggtt = &gt->i915->ggtt;
+	struct i915_ggtt *ggtt = gt->ggtt;
 	const struct resource *dsm = &gt->i915->dsm;
 	resource_size_t num_pages, page;
 	struct intel_engine_cs *engine;
diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c
index e1e5dd5f7638a1d2ae8a3f3750c9381bc285c5a4..6a69ac0184ad8cf5514b4f64e93a7a84a76f6acb 100644
--- a/drivers/gpu/drm/i915/gt/selftest_rps.c
+++ b/drivers/gpu/drm/i915/gt/selftest_rps.c
@@ -6,6 +6,8 @@
 #include <linux/pm_qos.h>
 #include <linux/sort.h>
 
+#include "gem/i915_gem_internal.h"
+
 #include "intel_engine_heartbeat.h"
 #include "intel_engine_pm.h"
 #include "intel_engine_regs.h"
diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
index 0287c2573c51bc03750380f5cba1abebe741e78a..67a9aab801ddf1ac5e2cfca3281808be2dbb7fa5 100644
--- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
@@ -3,6 +3,7 @@
  * Copyright © 2018 Intel Corporation
  */
 
+#include "gem/i915_gem_internal.h"
 #include "gem/i915_gem_pm.h"
 #include "gt/intel_engine_user.h"
 #include "gt/intel_gt.h"
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
index fe5d7d261797fa49cf1f5a71331d258ebecfb99e..7afdadc7656f1d83ef91ca6afd440c51acdccefb 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
@@ -7,9 +7,9 @@
 #define _ABI_GUC_ACTIONS_ABI_H
 
 /**
- * DOC: HOST2GUC_REGISTER_CTB
+ * DOC: HOST2GUC_SELF_CFG
  *
- * This message is used as part of the `CTB based communication`_ setup.
+ * This message is used by Host KMD to setup of the `GuC Self Config KLVs`_.
  *
  * This message must be sent as `MMIO HXG Message`_.
  *
@@ -22,20 +22,18 @@
  *  |   +-------+--------------------------------------------------------------+
  *  |   | 27:16 | DATA0 = MBZ                                                  |
  *  |   +-------+--------------------------------------------------------------+
- *  |   |  15:0 | ACTION = _`GUC_ACTION_HOST2GUC_REGISTER_CTB` = 0x4505        |
+ *  |   |  15:0 | ACTION = _`GUC_ACTION_HOST2GUC_SELF_CFG` = 0x0508            |
  *  +---+-------+--------------------------------------------------------------+
- *  | 1 | 31:12 | RESERVED = MBZ                                               |
+ *  | 1 | 31:16 | **KLV_KEY** - KLV key, see `GuC Self Config KLVs`_           |
  *  |   +-------+--------------------------------------------------------------+
- *  |   |  11:8 | **TYPE** - type for the `CT Buffer`_                         |
+ *  |   |  15:0 | **KLV_LEN** - KLV length                                     |
  *  |   |       |                                                              |
- *  |   |       |   - _`GUC_CTB_TYPE_HOST2GUC` = 0                             |
- *  |   |       |   - _`GUC_CTB_TYPE_GUC2HOST` = 1                             |
- *  |   +-------+--------------------------------------------------------------+
- *  |   |   7:0 | **SIZE** - size of the `CT Buffer`_ in 4K units minus 1      |
+ *  |   |       |   - 32 bit KLV = 1                                           |
+ *  |   |       |   - 64 bit KLV = 2                                           |
  *  +---+-------+--------------------------------------------------------------+
- *  | 2 |  31:0 | **DESC_ADDR** - GGTT address of the `CTB Descriptor`_        |
+ *  | 2 |  31:0 | **VALUE32** - Bits 31-0 of the KLV value                     |
  *  +---+-------+--------------------------------------------------------------+
- *  | 3 |  31:0 | **BUFF_ADDF** - GGTT address of the `CT Buffer`_             |
+ *  | 3 |  31:0 | **VALUE64** - Bits 63-32 of the KLV value (**KLV_LEN** = 2)  |
  *  +---+-------+--------------------------------------------------------------+
  *
  *  +---+-------+--------------------------------------------------------------+
@@ -45,28 +43,25 @@
  *  |   +-------+--------------------------------------------------------------+
  *  |   | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_                        |
  *  |   +-------+--------------------------------------------------------------+
- *  |   |  27:0 | DATA0 = MBZ                                                  |
+ *  |   |  27:0 | DATA0 = **NUM** - 1 if KLV was parsed, 0 if not recognized   |
  *  +---+-------+--------------------------------------------------------------+
  */
-#define GUC_ACTION_HOST2GUC_REGISTER_CTB		0x4505
+#define GUC_ACTION_HOST2GUC_SELF_CFG			0x0508
 
-#define HOST2GUC_REGISTER_CTB_REQUEST_MSG_LEN		(GUC_HXG_REQUEST_MSG_MIN_LEN + 3u)
-#define HOST2GUC_REGISTER_CTB_REQUEST_MSG_0_MBZ		GUC_HXG_REQUEST_MSG_0_DATA0
-#define HOST2GUC_REGISTER_CTB_REQUEST_MSG_1_MBZ		(0xfffff << 12)
-#define HOST2GUC_REGISTER_CTB_REQUEST_MSG_1_TYPE	(0xf << 8)
-#define   GUC_CTB_TYPE_HOST2GUC				0u
-#define   GUC_CTB_TYPE_GUC2HOST				1u
-#define HOST2GUC_REGISTER_CTB_REQUEST_MSG_1_SIZE	(0xff << 0)
-#define HOST2GUC_REGISTER_CTB_REQUEST_MSG_2_DESC_ADDR	GUC_HXG_REQUEST_MSG_n_DATAn
-#define HOST2GUC_REGISTER_CTB_REQUEST_MSG_3_BUFF_ADDR	GUC_HXG_REQUEST_MSG_n_DATAn
+#define HOST2GUC_SELF_CFG_REQUEST_MSG_LEN		(GUC_HXG_REQUEST_MSG_MIN_LEN + 3u)
+#define HOST2GUC_SELF_CFG_REQUEST_MSG_0_MBZ		GUC_HXG_REQUEST_MSG_0_DATA0
+#define HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_KEY		(0xffff << 16)
+#define HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_LEN		(0xffff << 0)
+#define HOST2GUC_SELF_CFG_REQUEST_MSG_2_VALUE32		GUC_HXG_REQUEST_MSG_n_DATAn
+#define HOST2GUC_SELF_CFG_REQUEST_MSG_3_VALUE64		GUC_HXG_REQUEST_MSG_n_DATAn
 
-#define HOST2GUC_REGISTER_CTB_RESPONSE_MSG_LEN		GUC_HXG_RESPONSE_MSG_MIN_LEN
-#define HOST2GUC_REGISTER_CTB_RESPONSE_MSG_0_MBZ	GUC_HXG_RESPONSE_MSG_0_DATA0
+#define HOST2GUC_SELF_CFG_RESPONSE_MSG_LEN		GUC_HXG_RESPONSE_MSG_MIN_LEN
+#define HOST2GUC_SELF_CFG_RESPONSE_MSG_0_NUM		GUC_HXG_RESPONSE_MSG_0_DATA0
 
 /**
- * DOC: HOST2GUC_DEREGISTER_CTB
+ * DOC: HOST2GUC_CONTROL_CTB
  *
- * This message is used as part of the `CTB based communication`_ teardown.
+ * This H2G action allows Vf Host to enable or disable H2G and G2H `CT Buffer`_.
  *
  * This message must be sent as `MMIO HXG Message`_.
  *
@@ -79,15 +74,12 @@
  *  |   +-------+--------------------------------------------------------------+
  *  |   | 27:16 | DATA0 = MBZ                                                  |
  *  |   +-------+--------------------------------------------------------------+
- *  |   |  15:0 | ACTION = _`GUC_ACTION_HOST2GUC_DEREGISTER_CTB` = 0x4506      |
+ *  |   |  15:0 | ACTION = _`GUC_ACTION_HOST2GUC_CONTROL_CTB` = 0x4509         |
  *  +---+-------+--------------------------------------------------------------+
- *  | 1 | 31:12 | RESERVED = MBZ                                               |
- *  |   +-------+--------------------------------------------------------------+
- *  |   |  11:8 | **TYPE** - type of the `CT Buffer`_                          |
+ *  | 1 |  31:0 | **CONTROL** - control `CTB based communication`_             |
  *  |   |       |                                                              |
- *  |   |       | see `GUC_ACTION_HOST2GUC_REGISTER_CTB`_                      |
- *  |   +-------+--------------------------------------------------------------+
- *  |   |   7:0 | RESERVED = MBZ                                               |
+ *  |   |       |   - _`GUC_CTB_CONTROL_DISABLE` = 0                           |
+ *  |   |       |   - _`GUC_CTB_CONTROL_ENABLE` = 1                            |
  *  +---+-------+--------------------------------------------------------------+
  *
  *  +---+-------+--------------------------------------------------------------+
@@ -100,16 +92,16 @@
  *  |   |  27:0 | DATA0 = MBZ                                                  |
  *  +---+-------+--------------------------------------------------------------+
  */
-#define GUC_ACTION_HOST2GUC_DEREGISTER_CTB		0x4506
+#define GUC_ACTION_HOST2GUC_CONTROL_CTB			0x4509
 
-#define HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_LEN		(GUC_HXG_REQUEST_MSG_MIN_LEN + 1u)
-#define HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_0_MBZ	GUC_HXG_REQUEST_MSG_0_DATA0
-#define HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_1_MBZ	(0xfffff << 12)
-#define HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_1_TYPE	(0xf << 8)
-#define HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_1_MBZ2	(0xff << 0)
+#define HOST2GUC_CONTROL_CTB_REQUEST_MSG_LEN		(GUC_HXG_REQUEST_MSG_MIN_LEN + 1u)
+#define HOST2GUC_CONTROL_CTB_REQUEST_MSG_0_MBZ		GUC_HXG_REQUEST_MSG_0_DATA0
+#define HOST2GUC_CONTROL_CTB_REQUEST_MSG_1_CONTROL	GUC_HXG_REQUEST_MSG_n_DATAn
+#define   GUC_CTB_CONTROL_DISABLE			0u
+#define   GUC_CTB_CONTROL_ENABLE			1u
 
-#define HOST2GUC_DEREGISTER_CTB_RESPONSE_MSG_LEN	GUC_HXG_RESPONSE_MSG_MIN_LEN
-#define HOST2GUC_DEREGISTER_CTB_RESPONSE_MSG_0_MBZ	GUC_HXG_RESPONSE_MSG_0_DATA0
+#define HOST2GUC_CONTROL_CTB_RESPONSE_MSG_LEN		GUC_HXG_RESPONSE_MSG_MIN_LEN
+#define HOST2GUC_CONTROL_CTB_RESPONSE_MSG_0_MBZ		GUC_HXG_RESPONSE_MSG_0_DATA0
 
 /* legacy definitions */
 
@@ -143,8 +135,12 @@ enum intel_guc_action {
 	INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506,
 	INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600,
 	INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601,
-	INTEL_GUC_ACTION_RESET_CLIENT = 0x5507,
+	INTEL_GUC_ACTION_CLIENT_SOFT_RESET = 0x5507,
 	INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF = 0x550A,
+	INTEL_GUC_ACTION_STATE_CAPTURE_NOTIFICATION = 0x8002,
+	INTEL_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE = 0x8003,
+	INTEL_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED = 0x8004,
+	INTEL_GUC_ACTION_NOTIFY_EXCEPTION = 0x8005,
 	INTEL_GUC_ACTION_LIMIT
 };
 
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h
index 488b6061ee8939048efca960c40796cf4c68e24c..c20658ee85a51937c96f3d096bae87b18ce9d998 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_errors_abi.h
@@ -11,4 +11,27 @@ enum intel_guc_response_status {
 	INTEL_GUC_RESPONSE_STATUS_GENERIC_FAIL = 0xF000,
 };
 
+enum intel_guc_load_status {
+	INTEL_GUC_LOAD_STATUS_DEFAULT                          = 0x00,
+	INTEL_GUC_LOAD_STATUS_START                            = 0x01,
+	INTEL_GUC_LOAD_STATUS_ERROR_DEVID_BUILD_MISMATCH       = 0x02,
+	INTEL_GUC_LOAD_STATUS_GUC_PREPROD_BUILD_MISMATCH       = 0x03,
+	INTEL_GUC_LOAD_STATUS_ERROR_DEVID_INVALID_GUCTYPE      = 0x04,
+	INTEL_GUC_LOAD_STATUS_GDT_DONE                         = 0x10,
+	INTEL_GUC_LOAD_STATUS_IDT_DONE                         = 0x20,
+	INTEL_GUC_LOAD_STATUS_LAPIC_DONE                       = 0x30,
+	INTEL_GUC_LOAD_STATUS_GUCINT_DONE                      = 0x40,
+	INTEL_GUC_LOAD_STATUS_DPC_READY                        = 0x50,
+	INTEL_GUC_LOAD_STATUS_DPC_ERROR                        = 0x60,
+	INTEL_GUC_LOAD_STATUS_EXCEPTION                        = 0x70,
+	INTEL_GUC_LOAD_STATUS_INIT_DATA_INVALID                = 0x71,
+	INTEL_GUC_LOAD_STATUS_PXP_TEARDOWN_CTRL_ENABLED        = 0x72,
+	INTEL_GUC_LOAD_STATUS_INVALID_INIT_DATA_RANGE_START,
+	INTEL_GUC_LOAD_STATUS_MPU_DATA_INVALID                 = 0x73,
+	INTEL_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID   = 0x74,
+	INTEL_GUC_LOAD_STATUS_INVALID_INIT_DATA_RANGE_END,
+
+	INTEL_GUC_LOAD_STATUS_READY                            = 0xF0,
+};
+
 #endif /* _ABI_GUC_ERRORS_ABI_H */
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h
new file mode 100644
index 0000000000000000000000000000000000000000..f0814a57c191eab423915da7d12afddae0e4b888
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _ABI_GUC_KLVS_ABI_H
+#define _ABI_GUC_KLVS_ABI_H
+
+/**
+ * DOC: GuC KLV
+ *
+ *  +---+-------+--------------------------------------------------------------+
+ *  |   | Bits  | Description                                                  |
+ *  +===+=======+==============================================================+
+ *  | 0 | 31:16 | **KEY** - KLV key identifier                                 |
+ *  |   |       |   - `GuC Self Config KLVs`_                                  |
+ *  |   |       |                                                              |
+ *  |   +-------+--------------------------------------------------------------+
+ *  |   |  15:0 | **LEN** - length of VALUE (in 32bit dwords)                  |
+ *  +---+-------+--------------------------------------------------------------+
+ *  | 1 |  31:0 | **VALUE** - actual value of the KLV (format depends on KEY)  |
+ *  +---+-------+                                                              |
+ *  |...|       |                                                              |
+ *  +---+-------+                                                              |
+ *  | n |  31:0 |                                                              |
+ *  +---+-------+--------------------------------------------------------------+
+ */
+
+#define GUC_KLV_LEN_MIN				1u
+#define GUC_KLV_0_KEY				(0xffff << 16)
+#define GUC_KLV_0_LEN				(0xffff << 0)
+#define GUC_KLV_n_VALUE				(0xffffffff << 0)
+
+/**
+ * DOC: GuC Self Config KLVs
+ *
+ * `GuC KLV`_ keys available for use with HOST2GUC_SELF_CFG_.
+ *
+ * _`GUC_KLV_SELF_CFG_H2G_CTB_ADDR` : 0x0902
+ *      Refers to 64 bit Global Gfx address of H2G `CT Buffer`_.
+ *      Should be above WOPCM address but below APIC base address for native mode.
+ *
+ * _`GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR` : 0x0903
+ *      Refers to 64 bit Global Gfx address of H2G `CTB Descriptor`_.
+ *      Should be above WOPCM address but below APIC base address for native mode.
+ *
+ * _`GUC_KLV_SELF_CFG_H2G_CTB_SIZE` : 0x0904
+ *      Refers to size of H2G `CT Buffer`_ in bytes.
+ *      Should be a multiple of 4K.
+ *
+ * _`GUC_KLV_SELF_CFG_G2H_CTB_ADDR` : 0x0905
+ *      Refers to 64 bit Global Gfx address of G2H `CT Buffer`_.
+ *      Should be above WOPCM address but below APIC base address for native mode.
+ *
+ * _`GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR` : 0x0906
+ *      Refers to 64 bit Global Gfx address of G2H `CTB Descriptor`_.
+ *      Should be above WOPCM address but below APIC base address for native mode.
+ *
+ * _`GUC_KLV_SELF_CFG_G2H_CTB_SIZE` : 0x0907
+ *      Refers to size of G2H `CT Buffer`_ in bytes.
+ *      Should be a multiple of 4K.
+ */
+
+#define GUC_KLV_SELF_CFG_H2G_CTB_ADDR_KEY		0x0902
+#define GUC_KLV_SELF_CFG_H2G_CTB_ADDR_LEN		2u
+
+#define GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR_KEY	0x0903
+#define GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR_LEN	2u
+
+#define GUC_KLV_SELF_CFG_H2G_CTB_SIZE_KEY		0x0904
+#define GUC_KLV_SELF_CFG_H2G_CTB_SIZE_LEN		1u
+
+#define GUC_KLV_SELF_CFG_G2H_CTB_ADDR_KEY		0x0905
+#define GUC_KLV_SELF_CFG_G2H_CTB_ADDR_LEN		2u
+
+#define GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR_KEY	0x0906
+#define GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR_LEN	2u
+
+#define GUC_KLV_SELF_CFG_G2H_CTB_SIZE_KEY		0x0907
+#define GUC_KLV_SELF_CFG_G2H_CTB_SIZE_LEN		1u
+
+#endif /* _ABI_GUC_KLVS_ABI_H */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 5bab32fef120559fdcbd61fea2424e3bedb73716..447a976c9f25bec5b64101046de51bf950e3245b 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -184,6 +184,9 @@ void intel_guc_init_early(struct intel_guc *guc)
 		guc->send_regs.count = GUC_MAX_MMIO_MSG_LEN;
 		BUILD_BUG_ON(GUC_MAX_MMIO_MSG_LEN > SOFT_SCRATCH_COUNT);
 	}
+
+	intel_guc_enable_msg(guc, INTEL_GUC_RECV_MSG_EXCEPTION |
+				  INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED);
 }
 
 void intel_guc_init_late(struct intel_guc *guc)
@@ -224,32 +227,48 @@ static u32 guc_ctl_log_params_flags(struct intel_guc *guc)
 	u32 flags;
 
 	#if (((CRASH_BUFFER_SIZE) % SZ_1M) == 0)
-	#define UNIT SZ_1M
-	#define FLAG GUC_LOG_ALLOC_IN_MEGABYTE
+	#define LOG_UNIT SZ_1M
+	#define LOG_FLAG GUC_LOG_LOG_ALLOC_UNITS
 	#else
-	#define UNIT SZ_4K
-	#define FLAG 0
+	#define LOG_UNIT SZ_4K
+	#define LOG_FLAG 0
+	#endif
+
+	#if (((CAPTURE_BUFFER_SIZE) % SZ_1M) == 0)
+	#define CAPTURE_UNIT SZ_1M
+	#define CAPTURE_FLAG GUC_LOG_CAPTURE_ALLOC_UNITS
+	#else
+	#define CAPTURE_UNIT SZ_4K
+	#define CAPTURE_FLAG 0
 	#endif
 
 	BUILD_BUG_ON(!CRASH_BUFFER_SIZE);
-	BUILD_BUG_ON(!IS_ALIGNED(CRASH_BUFFER_SIZE, UNIT));
+	BUILD_BUG_ON(!IS_ALIGNED(CRASH_BUFFER_SIZE, LOG_UNIT));
 	BUILD_BUG_ON(!DEBUG_BUFFER_SIZE);
-	BUILD_BUG_ON(!IS_ALIGNED(DEBUG_BUFFER_SIZE, UNIT));
+	BUILD_BUG_ON(!IS_ALIGNED(DEBUG_BUFFER_SIZE, LOG_UNIT));
+	BUILD_BUG_ON(!CAPTURE_BUFFER_SIZE);
+	BUILD_BUG_ON(!IS_ALIGNED(CAPTURE_BUFFER_SIZE, CAPTURE_UNIT));
 
-	BUILD_BUG_ON((CRASH_BUFFER_SIZE / UNIT - 1) >
+	BUILD_BUG_ON((CRASH_BUFFER_SIZE / LOG_UNIT - 1) >
 			(GUC_LOG_CRASH_MASK >> GUC_LOG_CRASH_SHIFT));
-	BUILD_BUG_ON((DEBUG_BUFFER_SIZE / UNIT - 1) >
+	BUILD_BUG_ON((DEBUG_BUFFER_SIZE / LOG_UNIT - 1) >
 			(GUC_LOG_DEBUG_MASK >> GUC_LOG_DEBUG_SHIFT));
+	BUILD_BUG_ON((CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) >
+			(GUC_LOG_CAPTURE_MASK >> GUC_LOG_CAPTURE_SHIFT));
 
 	flags = GUC_LOG_VALID |
 		GUC_LOG_NOTIFY_ON_HALF_FULL |
-		FLAG |
-		((CRASH_BUFFER_SIZE / UNIT - 1) << GUC_LOG_CRASH_SHIFT) |
-		((DEBUG_BUFFER_SIZE / UNIT - 1) << GUC_LOG_DEBUG_SHIFT) |
+		CAPTURE_FLAG |
+		LOG_FLAG |
+		((CRASH_BUFFER_SIZE / LOG_UNIT - 1) << GUC_LOG_CRASH_SHIFT) |
+		((DEBUG_BUFFER_SIZE / LOG_UNIT - 1) << GUC_LOG_DEBUG_SHIFT) |
+		((CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) << GUC_LOG_CAPTURE_SHIFT) |
 		(offset << GUC_LOG_BUF_ADDR_SHIFT);
 
-	#undef UNIT
-	#undef FLAG
+	#undef LOG_UNIT
+	#undef LOG_FLAG
+	#undef CAPTURE_UNIT
+	#undef CAPTURE_FLAG
 
 	return flags;
 }
@@ -262,6 +281,26 @@ static u32 guc_ctl_ads_flags(struct intel_guc *guc)
 	return flags;
 }
 
+static u32 guc_ctl_wa_flags(struct intel_guc *guc)
+{
+	struct intel_gt *gt = guc_to_gt(guc);
+	u32 flags = 0;
+
+	/* Wa_22012773006:gen11,gen12 < XeHP */
+	if (GRAPHICS_VER(gt->i915) >= 11 &&
+	    GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 50))
+		flags |= GUC_WA_POLLCS;
+
+	return flags;
+}
+
+static u32 guc_ctl_devid(struct intel_guc *guc)
+{
+	struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+
+	return (INTEL_DEVID(i915) << 16) | INTEL_REVID(i915);
+}
+
 /*
  * Initialise the GuC parameter block before starting the firmware
  * transfer. These parameters are read by the firmware on startup
@@ -278,6 +317,8 @@ static void guc_init_params(struct intel_guc *guc)
 	params[GUC_CTL_FEATURE] = guc_ctl_feature_flags(guc);
 	params[GUC_CTL_DEBUG] = guc_ctl_debug_flags(guc);
 	params[GUC_CTL_ADS] = guc_ctl_ads_flags(guc);
+	params[GUC_CTL_WA] = guc_ctl_wa_flags(guc);
+	params[GUC_CTL_DEVID] = guc_ctl_devid(guc);
 
 	for (i = 0; i < GUC_CTL_MAX_DWORDS; i++)
 		DRM_DEBUG_DRIVER("param[%2d] = %#x\n", i, params[i]);
@@ -515,9 +556,10 @@ int intel_guc_to_host_process_recv_msg(struct intel_guc *guc,
 	/* Make sure to handle only enabled messages */
 	msg = payload[0] & guc->msg_enabled_mask;
 
-	if (msg & (INTEL_GUC_RECV_MSG_FLUSH_LOG_BUFFER |
-		   INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED))
-		intel_guc_log_handle_flush_event(&guc->log);
+	if (msg & INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED)
+		drm_err(&guc_to_gt(guc)->i915->drm, "Received early GuC crash dump notification!\n");
+	if (msg & INTEL_GUC_RECV_MSG_EXCEPTION)
+		drm_err(&guc_to_gt(guc)->i915->drm, "Received early GuC exception notification!\n");
 
 	return 0;
 }
@@ -551,7 +593,7 @@ int intel_guc_suspend(struct intel_guc *guc)
 {
 	int ret;
 	u32 action[] = {
-		INTEL_GUC_ACTION_RESET_CLIENT,
+		INTEL_GUC_ACTION_CLIENT_SOFT_RESET,
 	};
 
 	if (!intel_guc_is_ready(guc))
@@ -715,6 +757,56 @@ int intel_guc_allocate_and_map_vma(struct intel_guc *guc, u32 size,
 	return 0;
 }
 
+static int __guc_action_self_cfg(struct intel_guc *guc, u16 key, u16 len, u64 value)
+{
+	u32 request[HOST2GUC_SELF_CFG_REQUEST_MSG_LEN] = {
+		FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
+		FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
+		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_HOST2GUC_SELF_CFG),
+		FIELD_PREP(HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_KEY, key) |
+		FIELD_PREP(HOST2GUC_SELF_CFG_REQUEST_MSG_1_KLV_LEN, len),
+		FIELD_PREP(HOST2GUC_SELF_CFG_REQUEST_MSG_2_VALUE32, lower_32_bits(value)),
+		FIELD_PREP(HOST2GUC_SELF_CFG_REQUEST_MSG_3_VALUE64, upper_32_bits(value)),
+	};
+	int ret;
+
+	GEM_BUG_ON(len > 2);
+	GEM_BUG_ON(len == 1 && upper_32_bits(value));
+
+	/* Self config must go over MMIO */
+	ret = intel_guc_send_mmio(guc, request, ARRAY_SIZE(request), NULL, 0);
+
+	if (unlikely(ret < 0))
+		return ret;
+	if (unlikely(ret > 1))
+		return -EPROTO;
+	if (unlikely(!ret))
+		return -ENOKEY;
+
+	return 0;
+}
+
+static int __guc_self_cfg(struct intel_guc *guc, u16 key, u16 len, u64 value)
+{
+	struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+	int err = __guc_action_self_cfg(guc, key, len, value);
+
+	if (unlikely(err))
+		i915_probe_error(i915, "Unsuccessful self-config (%pe) key %#hx value %#llx\n",
+				 ERR_PTR(err), key, value);
+	return err;
+}
+
+int intel_guc_self_cfg32(struct intel_guc *guc, u16 key, u32 value)
+{
+	return __guc_self_cfg(guc, key, 1, value);
+}
+
+int intel_guc_self_cfg64(struct intel_guc *guc, u16 key, u64 value)
+{
+	return __guc_self_cfg(guc, key, 2, value);
+}
+
 /**
  * intel_guc_load_status - dump information about GuC load status
  * @guc: the GuC
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 3aabe164c3291a47fb2dc3e38ce11828c03d7829..9d779de16613f61514a1fc3801a0d2abed1d61e5 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -119,6 +119,15 @@ struct intel_guc {
 		 * function as it might be in an atomic context (no sleeping)
 		 */
 		struct work_struct destroyed_worker;
+		/**
+		 * @reset_fail_worker: worker to trigger a GT reset after an
+		 * engine reset fails
+		 */
+		struct work_struct reset_fail_worker;
+		/**
+		 * @reset_fail_mask: mask of engines that failed to reset
+		 */
+		intel_engine_mask_t reset_fail_mask;
 	} submission_state;
 
 	/**
@@ -141,6 +150,13 @@ struct intel_guc {
 	struct __guc_ads_blob *ads_blob;
 	/** @ads_regset_size: size of the save/restore regsets in the ADS */
 	u32 ads_regset_size;
+	/**
+	 * @ads_regset_count: number of save/restore registers in the ADS for
+	 * each engine
+	 */
+	u32 ads_regset_count[I915_NUM_ENGINES];
+	/** @ads_regset: save/restore regsets in the ADS */
+	struct guc_mmio_reg *ads_regset;
 	/** @ads_golden_ctxt_size: size of the golden contexts in the ADS */
 	u32 ads_golden_ctxt_size;
 	/** @ads_engine_usage_size: size of engine usage in the ADS */
@@ -333,6 +349,8 @@ int intel_guc_resume(struct intel_guc *guc);
 struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size);
 int intel_guc_allocate_and_map_vma(struct intel_guc *guc, u32 size,
 				   struct i915_vma **out_vma, void **out_vaddr);
+int intel_guc_self_cfg32(struct intel_guc *guc, u16 key, u32 value);
+int intel_guc_self_cfg64(struct intel_guc *guc, u16 key, u64 value);
 
 static inline bool intel_guc_is_supported(struct intel_guc *guc)
 {
@@ -409,6 +427,8 @@ int intel_guc_context_reset_process_msg(struct intel_guc *guc,
 					const u32 *msg, u32 len);
 int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
 					 const u32 *msg, u32 len);
+int intel_guc_error_capture_process_msg(struct intel_guc *guc,
+					const u32 *msg, u32 len);
 
 void intel_guc_find_hung_context(struct intel_engine_cs *engine);
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
index 162b891985673f08ff76081bf53cdbe5ba4eae40..7e41175618f587ff4bf48a453ab6c06fb445a274 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
@@ -42,6 +42,10 @@
  *      +---------------------------------------+
  *      | padding                               |
  *      +---------------------------------------+ <== 4K aligned
+ *      | capture lists                         |
+ *      +---------------------------------------+
+ *      | padding                               |
+ *      +---------------------------------------+ <== 4K aligned
  *      | private data                          |
  *      +---------------------------------------+
  *      | padding                               |
@@ -67,6 +71,12 @@ static u32 guc_ads_golden_ctxt_size(struct intel_guc *guc)
 	return PAGE_ALIGN(guc->ads_golden_ctxt_size);
 }
 
+static u32 guc_ads_capture_size(struct intel_guc *guc)
+{
+	/* FIXME: Allocate a proper capture list */
+	return PAGE_ALIGN(PAGE_SIZE);
+}
+
 static u32 guc_ads_private_data_size(struct intel_guc *guc)
 {
 	return PAGE_ALIGN(guc->fw.private_data_size);
@@ -87,7 +97,7 @@ static u32 guc_ads_golden_ctxt_offset(struct intel_guc *guc)
 	return PAGE_ALIGN(offset);
 }
 
-static u32 guc_ads_private_data_offset(struct intel_guc *guc)
+static u32 guc_ads_capture_offset(struct intel_guc *guc)
 {
 	u32 offset;
 
@@ -97,6 +107,16 @@ static u32 guc_ads_private_data_offset(struct intel_guc *guc)
 	return PAGE_ALIGN(offset);
 }
 
+static u32 guc_ads_private_data_offset(struct intel_guc *guc)
+{
+	u32 offset;
+
+	offset = guc_ads_capture_offset(guc) +
+		 guc_ads_capture_size(guc);
+
+	return PAGE_ALIGN(offset);
+}
+
 static u32 guc_ads_blob_size(struct intel_guc *guc)
 {
 	return guc_ads_private_data_offset(guc) +
@@ -188,14 +208,18 @@ static void guc_mapping_table_init(struct intel_gt *gt,
 
 /*
  * The save/restore register list must be pre-calculated to a temporary
- * buffer of driver defined size before it can be generated in place
- * inside the ADS.
+ * buffer before it can be copied inside the ADS.
  */
-#define MAX_MMIO_REGS	128	/* Arbitrary size, increase as needed */
 struct temp_regset {
+	/*
+	 * ptr to the section of the storage for the engine currently being
+	 * worked on
+	 */
 	struct guc_mmio_reg *registers;
-	u32 used;
-	u32 size;
+	/* ptr to the base of the allocated storage for all engines */
+	struct guc_mmio_reg *storage;
+	u32 storage_used;
+	u32 storage_max;
 };
 
 static int guc_mmio_reg_cmp(const void *a, const void *b)
@@ -206,18 +230,44 @@ static int guc_mmio_reg_cmp(const void *a, const void *b)
 	return (int)ra->offset - (int)rb->offset;
 }
 
-static void guc_mmio_reg_add(struct temp_regset *regset,
-			     u32 offset, u32 flags)
+static struct guc_mmio_reg * __must_check
+__mmio_reg_add(struct temp_regset *regset, struct guc_mmio_reg *reg)
 {
-	u32 count = regset->used;
+	u32 pos = regset->storage_used;
+	struct guc_mmio_reg *slot;
+
+	if (pos >= regset->storage_max) {
+		size_t size = ALIGN((pos + 1) * sizeof(*slot), PAGE_SIZE);
+		struct guc_mmio_reg *r = krealloc(regset->storage,
+						  size, GFP_KERNEL);
+		if (!r) {
+			WARN_ONCE(1, "Incomplete regset list: can't add register (%d)\n",
+				  -ENOMEM);
+			return ERR_PTR(-ENOMEM);
+		}
+
+		regset->registers = r + (regset->registers - regset->storage);
+		regset->storage = r;
+		regset->storage_max = size / sizeof(*slot);
+	}
+
+	slot = &regset->storage[pos];
+	regset->storage_used++;
+	*slot = *reg;
+
+	return slot;
+}
+
+static long __must_check guc_mmio_reg_add(struct temp_regset *regset,
+					  u32 offset, u32 flags)
+{
+	u32 count = regset->storage_used - (regset->registers - regset->storage);
 	struct guc_mmio_reg reg = {
 		.offset = offset,
 		.flags = flags,
 	};
 	struct guc_mmio_reg *slot;
 
-	GEM_BUG_ON(count >= regset->size);
-
 	/*
 	 * The mmio list is built using separate lists within the driver.
 	 * It's possible that at some point we may attempt to add the same
@@ -226,11 +276,11 @@ static void guc_mmio_reg_add(struct temp_regset *regset,
 	 */
 	if (bsearch(&reg, regset->registers, count,
 		    sizeof(reg), guc_mmio_reg_cmp))
-		return;
+		return 0;
 
-	slot = &regset->registers[count];
-	regset->used++;
-	*slot = reg;
+	slot = __mmio_reg_add(regset, &reg);
+	if (IS_ERR(slot))
+		return PTR_ERR(slot);
 
 	while (slot-- > regset->registers) {
 		GEM_BUG_ON(slot[0].offset == slot[1].offset);
@@ -239,6 +289,8 @@ static void guc_mmio_reg_add(struct temp_regset *regset,
 
 		swap(slot[1], slot[0]);
 	}
+
+	return 0;
 }
 
 #define GUC_MMIO_REG_ADD(regset, reg, masked) \
@@ -246,62 +298,71 @@ static void guc_mmio_reg_add(struct temp_regset *regset,
 			 i915_mmio_reg_offset((reg)), \
 			 (masked) ? GUC_REGSET_MASKED : 0)
 
-static void guc_mmio_regset_init(struct temp_regset *regset,
-				 struct intel_engine_cs *engine)
+static int guc_mmio_regset_init(struct temp_regset *regset,
+				struct intel_engine_cs *engine)
 {
 	const u32 base = engine->mmio_base;
 	struct i915_wa_list *wal = &engine->wa_list;
 	struct i915_wa *wa;
 	unsigned int i;
+	int ret = 0;
 
-	regset->used = 0;
+	/*
+	 * Each engine's registers point to a new start relative to
+	 * storage
+	 */
+	regset->registers = regset->storage + regset->storage_used;
 
-	GUC_MMIO_REG_ADD(regset, RING_MODE_GEN7(base), true);
-	GUC_MMIO_REG_ADD(regset, RING_HWS_PGA(base), false);
-	GUC_MMIO_REG_ADD(regset, RING_IMR(base), false);
+	ret |= GUC_MMIO_REG_ADD(regset, RING_MODE_GEN7(base), true);
+	ret |= GUC_MMIO_REG_ADD(regset, RING_HWS_PGA(base), false);
+	ret |= GUC_MMIO_REG_ADD(regset, RING_IMR(base), false);
 
 	for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
-		GUC_MMIO_REG_ADD(regset, wa->reg, wa->masked_reg);
+		ret |= GUC_MMIO_REG_ADD(regset, wa->reg, wa->masked_reg);
 
 	/* Be extra paranoid and include all whitelist registers. */
 	for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++)
-		GUC_MMIO_REG_ADD(regset,
-				 RING_FORCE_TO_NONPRIV(base, i),
-				 false);
+		ret |= GUC_MMIO_REG_ADD(regset,
+					RING_FORCE_TO_NONPRIV(base, i),
+					false);
 
 	/* add in local MOCS registers */
 	for (i = 0; i < GEN9_LNCFCMOCS_REG_COUNT; i++)
-		GUC_MMIO_REG_ADD(regset, GEN9_LNCFCMOCS(i), false);
+		ret |= GUC_MMIO_REG_ADD(regset, GEN9_LNCFCMOCS(i), false);
+
+	return ret ? -1 : 0;
 }
 
-static int guc_mmio_reg_state_query(struct intel_guc *guc)
+static long guc_mmio_reg_state_create(struct intel_guc *guc)
 {
 	struct intel_gt *gt = guc_to_gt(guc);
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
-	struct temp_regset temp_set;
-	u32 total;
+	struct temp_regset temp_set = {};
+	long total = 0;
+	long ret;
 
-	/*
-	 * Need to actually build the list in order to filter out
-	 * duplicates and other such data dependent constructions.
-	 */
-	temp_set.size = MAX_MMIO_REGS;
-	temp_set.registers = kmalloc_array(temp_set.size,
-					   sizeof(*temp_set.registers),
-					   GFP_KERNEL);
-	if (!temp_set.registers)
-		return -ENOMEM;
-
-	total = 0;
 	for_each_engine(engine, gt, id) {
-		guc_mmio_regset_init(&temp_set, engine);
-		total += temp_set.used;
+		u32 used = temp_set.storage_used;
+
+		ret = guc_mmio_regset_init(&temp_set, engine);
+		if (ret < 0)
+			goto fail_regset_init;
+
+		guc->ads_regset_count[id] = temp_set.storage_used - used;
+		total += guc->ads_regset_count[id];
 	}
 
-	kfree(temp_set.registers);
+	guc->ads_regset = temp_set.storage;
+
+	drm_dbg(&guc_to_gt(guc)->i915->drm, "Used %zu KB for temporary ADS regset\n",
+		(temp_set.storage_max * sizeof(struct guc_mmio_reg)) >> 10);
 
 	return total * sizeof(struct guc_mmio_reg);
+
+fail_regset_init:
+	kfree(temp_set.storage);
+	return ret;
 }
 
 static void guc_mmio_reg_state_init(struct intel_guc *guc,
@@ -309,40 +370,38 @@ static void guc_mmio_reg_state_init(struct intel_guc *guc,
 {
 	struct intel_gt *gt = guc_to_gt(guc);
 	struct intel_engine_cs *engine;
+	struct guc_mmio_reg *ads_registers;
 	enum intel_engine_id id;
-	struct temp_regset temp_set;
-	struct guc_mmio_reg_set *ads_reg_set;
 	u32 addr_ggtt, offset;
-	u8 guc_class;
 
 	offset = guc_ads_regset_offset(guc);
 	addr_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset;
-	temp_set.registers = (struct guc_mmio_reg *)(((u8 *)blob) + offset);
-	temp_set.size = guc->ads_regset_size / sizeof(temp_set.registers[0]);
+	ads_registers = (struct guc_mmio_reg *)(((u8 *)blob) + offset);
+
+	memcpy(ads_registers, guc->ads_regset, guc->ads_regset_size);
 
 	for_each_engine(engine, gt, id) {
+		u32 count = guc->ads_regset_count[id];
+		struct guc_mmio_reg_set *ads_reg_set;
+		u8 guc_class;
+
 		/* Class index is checked in class converter */
 		GEM_BUG_ON(engine->instance >= GUC_MAX_INSTANCES_PER_CLASS);
 
 		guc_class = engine_class_to_guc_class(engine->class);
 		ads_reg_set = &blob->ads.reg_state_list[guc_class][engine->instance];
 
-		guc_mmio_regset_init(&temp_set, engine);
-		if (!temp_set.used) {
+		if (!count) {
 			ads_reg_set->address = 0;
 			ads_reg_set->count = 0;
 			continue;
 		}
 
 		ads_reg_set->address = addr_ggtt;
-		ads_reg_set->count = temp_set.used;
+		ads_reg_set->count = count;
 
-		temp_set.size -= temp_set.used;
-		temp_set.registers += temp_set.used;
-		addr_ggtt += temp_set.used * sizeof(struct guc_mmio_reg);
+		addr_ggtt += count * sizeof(struct guc_mmio_reg);
 	}
-
-	GEM_BUG_ON(temp_set.size);
 }
 
 static void fill_engine_enable_masks(struct intel_gt *gt,
@@ -501,6 +560,26 @@ static void guc_init_golden_context(struct intel_guc *guc)
 	GEM_BUG_ON(guc->ads_golden_ctxt_size != total_size);
 }
 
+static void guc_capture_list_init(struct intel_guc *guc, struct __guc_ads_blob *blob)
+{
+	int i, j;
+	u32 addr_ggtt, offset;
+
+	offset = guc_ads_capture_offset(guc);
+	addr_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset;
+
+	/* FIXME: Populate a proper capture list */
+
+	for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; i++) {
+		for (j = 0; j < GUC_MAX_ENGINE_CLASSES; j++) {
+			blob->ads.capture_instance[i][j] = addr_ggtt;
+			blob->ads.capture_class[i][j] = addr_ggtt;
+		}
+
+		blob->ads.capture_global[i] = addr_ggtt;
+	}
+}
+
 static void __guc_ads_init(struct intel_guc *guc)
 {
 	struct intel_gt *gt = guc_to_gt(guc);
@@ -534,6 +613,9 @@ static void __guc_ads_init(struct intel_guc *guc)
 
 	base = intel_guc_ggtt_offset(guc, guc->ads_vma);
 
+	/* Capture list for hang debug */
+	guc_capture_list_init(guc, blob);
+
 	/* ADS */
 	blob->ads.scheduler_policies = base + ptr_offset(blob, policies);
 	blob->ads.gt_system_info = base + ptr_offset(blob, system_info);
@@ -561,8 +643,11 @@ int intel_guc_ads_create(struct intel_guc *guc)
 
 	GEM_BUG_ON(guc->ads_vma);
 
-	/* Need to calculate the reg state size dynamically: */
-	ret = guc_mmio_reg_state_query(guc);
+	/*
+	 * Create reg state size dynamically on system memory to be copied to
+	 * the final ads blob on gt init/reset
+	 */
+	ret = guc_mmio_reg_state_create(guc);
 	if (ret < 0)
 		return ret;
 	guc->ads_regset_size = ret;
@@ -602,6 +687,7 @@ void intel_guc_ads_destroy(struct intel_guc *guc)
 {
 	i915_vma_unpin_and_release(&guc->ads_vma, I915_VMA_RELEASE_MAP);
 	guc->ads_blob = NULL;
+	kfree(guc->ads_regset);
 }
 
 static void guc_ads_private_data_reset(struct intel_guc *guc)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index aa6dd6415202a1ebd0e81546abf49898938a863a..2f7fc87a78e1b78046baf665154a2ecbdcfe050e 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -112,18 +112,6 @@ void intel_guc_ct_init_early(struct intel_guc_ct *ct)
 	init_waitqueue_head(&ct->wq);
 }
 
-static inline const char *guc_ct_buffer_type_to_str(u32 type)
-{
-	switch (type) {
-	case GUC_CTB_TYPE_HOST2GUC:
-		return "SEND";
-	case GUC_CTB_TYPE_GUC2HOST:
-		return "RECV";
-	default:
-		return "<invalid>";
-	}
-}
-
 static void guc_ct_buffer_desc_init(struct guc_ct_buffer_desc *desc)
 {
 	memset(desc, 0, sizeof(*desc));
@@ -156,71 +144,65 @@ static void guc_ct_buffer_init(struct intel_guc_ct_buffer *ctb,
 	guc_ct_buffer_reset(ctb);
 }
 
-static int guc_action_register_ct_buffer(struct intel_guc *guc, u32 type,
-					 u32 desc_addr, u32 buff_addr, u32 size)
+static int guc_action_control_ctb(struct intel_guc *guc, u32 control)
 {
-	u32 request[HOST2GUC_REGISTER_CTB_REQUEST_MSG_LEN] = {
+	u32 request[HOST2GUC_CONTROL_CTB_REQUEST_MSG_LEN] = {
 		FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
 		FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
-		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_HOST2GUC_REGISTER_CTB),
-		FIELD_PREP(HOST2GUC_REGISTER_CTB_REQUEST_MSG_1_SIZE, size / SZ_4K - 1) |
-		FIELD_PREP(HOST2GUC_REGISTER_CTB_REQUEST_MSG_1_TYPE, type),
-		FIELD_PREP(HOST2GUC_REGISTER_CTB_REQUEST_MSG_2_DESC_ADDR, desc_addr),
-		FIELD_PREP(HOST2GUC_REGISTER_CTB_REQUEST_MSG_3_BUFF_ADDR, buff_addr),
+		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_HOST2GUC_CONTROL_CTB),
+		FIELD_PREP(HOST2GUC_CONTROL_CTB_REQUEST_MSG_1_CONTROL, control),
 	};
 	int ret;
 
-	GEM_BUG_ON(type != GUC_CTB_TYPE_HOST2GUC && type != GUC_CTB_TYPE_GUC2HOST);
-	GEM_BUG_ON(size % SZ_4K);
+	GEM_BUG_ON(control != GUC_CTB_CONTROL_DISABLE && control != GUC_CTB_CONTROL_ENABLE);
 
-	/* CT registration must go over MMIO */
+	/* CT control must go over MMIO */
 	ret = intel_guc_send_mmio(guc, request, ARRAY_SIZE(request), NULL, 0);
 
 	return ret > 0 ? -EPROTO : ret;
 }
 
-static int ct_register_buffer(struct intel_guc_ct *ct, u32 type,
-			      u32 desc_addr, u32 buff_addr, u32 size)
+static int ct_control_enable(struct intel_guc_ct *ct, bool enable)
 {
 	int err;
 
-	err = i915_inject_probe_error(guc_to_gt(ct_to_guc(ct))->i915, -ENXIO);
+	err = guc_action_control_ctb(ct_to_guc(ct), enable ?
+				     GUC_CTB_CONTROL_ENABLE : GUC_CTB_CONTROL_DISABLE);
 	if (unlikely(err))
-		return err;
+		CT_PROBE_ERROR(ct, "Failed to control/%s CTB (%pe)\n",
+			       enabledisable(enable), ERR_PTR(err));
 
-	err = guc_action_register_ct_buffer(ct_to_guc(ct), type,
-					    desc_addr, buff_addr, size);
-	if (unlikely(err))
-		CT_ERROR(ct, "Failed to register %s buffer (%pe)\n",
-			 guc_ct_buffer_type_to_str(type), ERR_PTR(err));
 	return err;
 }
 
-static int guc_action_deregister_ct_buffer(struct intel_guc *guc, u32 type)
+static int ct_register_buffer(struct intel_guc_ct *ct, bool send,
+			      u32 desc_addr, u32 buff_addr, u32 size)
 {
-	u32 request[HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_LEN] = {
-		FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
-		FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
-		FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_HOST2GUC_DEREGISTER_CTB),
-		FIELD_PREP(HOST2GUC_DEREGISTER_CTB_REQUEST_MSG_1_TYPE, type),
-	};
-	int ret;
-
-	GEM_BUG_ON(type != GUC_CTB_TYPE_HOST2GUC && type != GUC_CTB_TYPE_GUC2HOST);
-
-	/* CT deregistration must go over MMIO */
-	ret = intel_guc_send_mmio(guc, request, ARRAY_SIZE(request), NULL, 0);
+	int err;
 
-	return ret > 0 ? -EPROTO : ret;
-}
+	err = intel_guc_self_cfg64(ct_to_guc(ct), send ?
+				   GUC_KLV_SELF_CFG_H2G_CTB_DESCRIPTOR_ADDR_KEY :
+				   GUC_KLV_SELF_CFG_G2H_CTB_DESCRIPTOR_ADDR_KEY,
+				   desc_addr);
+	if (unlikely(err))
+		goto failed;
 
-static int ct_deregister_buffer(struct intel_guc_ct *ct, u32 type)
-{
-	int err = guc_action_deregister_ct_buffer(ct_to_guc(ct), type);
+	err = intel_guc_self_cfg64(ct_to_guc(ct), send ?
+				   GUC_KLV_SELF_CFG_H2G_CTB_ADDR_KEY :
+				   GUC_KLV_SELF_CFG_G2H_CTB_ADDR_KEY,
+				   buff_addr);
+	if (unlikely(err))
+		goto failed;
 
+	err = intel_guc_self_cfg32(ct_to_guc(ct), send ?
+				   GUC_KLV_SELF_CFG_H2G_CTB_SIZE_KEY :
+				   GUC_KLV_SELF_CFG_G2H_CTB_SIZE_KEY,
+				   size);
 	if (unlikely(err))
-		CT_ERROR(ct, "Failed to deregister %s buffer (%pe)\n",
-			 guc_ct_buffer_type_to_str(type), ERR_PTR(err));
+failed:
+		CT_PROBE_ERROR(ct, "Failed to register %s buffer (%pe)\n",
+			       send ? "SEND" : "RECV", ERR_PTR(err));
+
 	return err;
 }
 
@@ -308,7 +290,7 @@ void intel_guc_ct_fini(struct intel_guc_ct *ct)
 int intel_guc_ct_enable(struct intel_guc_ct *ct)
 {
 	struct intel_guc *guc = ct_to_guc(ct);
-	u32 base, desc, cmds;
+	u32 base, desc, cmds, size;
 	void *blob;
 	int err;
 
@@ -333,27 +315,27 @@ int intel_guc_ct_enable(struct intel_guc_ct *ct)
 	 */
 	desc = base + ptrdiff(ct->ctbs.recv.desc, blob);
 	cmds = base + ptrdiff(ct->ctbs.recv.cmds, blob);
-	err = ct_register_buffer(ct, GUC_CTB_TYPE_GUC2HOST,
-				 desc, cmds, ct->ctbs.recv.size * 4);
-
+	size = ct->ctbs.recv.size * 4;
+	err = ct_register_buffer(ct, false, desc, cmds, size);
 	if (unlikely(err))
 		goto err_out;
 
 	desc = base + ptrdiff(ct->ctbs.send.desc, blob);
 	cmds = base + ptrdiff(ct->ctbs.send.cmds, blob);
-	err = ct_register_buffer(ct, GUC_CTB_TYPE_HOST2GUC,
-				 desc, cmds, ct->ctbs.send.size * 4);
+	size = ct->ctbs.send.size * 4;
+	err = ct_register_buffer(ct, true, desc, cmds, size);
+	if (unlikely(err))
+		goto err_out;
 
+	err = ct_control_enable(ct, true);
 	if (unlikely(err))
-		goto err_deregister;
+		goto err_out;
 
 	ct->enabled = true;
 	ct->stall_time = KTIME_MAX;
 
 	return 0;
 
-err_deregister:
-	ct_deregister_buffer(ct, GUC_CTB_TYPE_GUC2HOST);
 err_out:
 	CT_PROBE_ERROR(ct, "Failed to enable CTB (%pe)\n", ERR_PTR(err));
 	return err;
@@ -372,8 +354,7 @@ void intel_guc_ct_disable(struct intel_guc_ct *ct)
 	ct->enabled = false;
 
 	if (intel_guc_is_fw_running(guc)) {
-		ct_deregister_buffer(ct, GUC_CTB_TYPE_HOST2GUC);
-		ct_deregister_buffer(ct, GUC_CTB_TYPE_GUC2HOST);
+		ct_control_enable(ct, false);
 	}
 }
 
@@ -662,6 +643,7 @@ static int ct_send(struct intel_guc_ct *ct,
 	struct ct_request request;
 	unsigned long flags;
 	unsigned int sleep_period_ms = 1;
+	bool send_again;
 	u32 fence;
 	int err;
 
@@ -671,6 +653,9 @@ static int ct_send(struct intel_guc_ct *ct,
 	GEM_BUG_ON(!response_buf && response_buf_size);
 	might_sleep();
 
+resend:
+	send_again = false;
+
 	/*
 	 * We use a lazy spin wait loop here as we believe that if the CT
 	 * buffers are sized correctly the flow control condition should be
@@ -725,6 +710,13 @@ static int ct_send(struct intel_guc_ct *ct,
 		goto unlink;
 	}
 
+	if (FIELD_GET(GUC_HXG_MSG_0_TYPE, *status) == GUC_HXG_TYPE_NO_RESPONSE_RETRY) {
+		CT_DEBUG(ct, "retrying request %#x (%u)\n", *action,
+			 FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, *status));
+		send_again = true;
+		goto unlink;
+	}
+
 	if (FIELD_GET(GUC_HXG_MSG_0_TYPE, *status) != GUC_HXG_TYPE_RESPONSE_SUCCESS) {
 		err = -EIO;
 		goto unlink;
@@ -747,6 +739,9 @@ static int ct_send(struct intel_guc_ct *ct,
 	list_del(&request.link);
 	spin_unlock_irqrestore(&ct->requests.lock, flags);
 
+	if (unlikely(send_again))
+		goto resend;
+
 	return err;
 }
 
@@ -789,7 +784,7 @@ static struct ct_incoming_msg *ct_alloc_msg(u32 num_dwords)
 {
 	struct ct_incoming_msg *msg;
 
-	msg = kmalloc(sizeof(*msg) + sizeof(u32) * num_dwords, GFP_ATOMIC);
+	msg = kmalloc(struct_size(msg, msg, num_dwords), GFP_ATOMIC);
 	if (msg)
 		msg->size = num_dwords;
 	return msg;
@@ -918,6 +913,7 @@ static int ct_handle_response(struct intel_guc_ct *ct, struct ct_incoming_msg *r
 	GEM_BUG_ON(len < GUC_HXG_MSG_MIN_LEN);
 	GEM_BUG_ON(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, hxg[0]) != GUC_HXG_ORIGIN_GUC);
 	GEM_BUG_ON(FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) != GUC_HXG_TYPE_RESPONSE_SUCCESS &&
+		   FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) != GUC_HXG_TYPE_NO_RESPONSE_RETRY &&
 		   FIELD_GET(GUC_HXG_MSG_0_TYPE, hxg[0]) != GUC_HXG_TYPE_RESPONSE_FAILURE);
 
 	CT_DEBUG(ct, "response fence %u status %#x\n", fence, hxg[0]);
@@ -990,9 +986,27 @@ static int ct_process_request(struct intel_guc_ct *ct, struct ct_incoming_msg *r
 	case INTEL_GUC_ACTION_CONTEXT_RESET_NOTIFICATION:
 		ret = intel_guc_context_reset_process_msg(guc, payload, len);
 		break;
+	case INTEL_GUC_ACTION_STATE_CAPTURE_NOTIFICATION:
+		ret = intel_guc_error_capture_process_msg(guc, payload, len);
+		if (unlikely(ret))
+			CT_ERROR(ct, "error capture notification failed %x %*ph\n",
+				 action, 4 * len, payload);
+		break;
 	case INTEL_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION:
 		ret = intel_guc_engine_failure_process_msg(guc, payload, len);
 		break;
+	case INTEL_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE:
+		intel_guc_log_handle_flush_event(&guc->log);
+		ret = 0;
+		break;
+	case INTEL_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED:
+		CT_ERROR(ct, "Received GuC crash dump notification!\n");
+		ret = 0;
+		break;
+	case INTEL_GUC_ACTION_NOTIFY_EXCEPTION:
+		CT_ERROR(ct, "Received GuC exception notification!\n");
+		ret = 0;
+		break;
 	default:
 		ret = -EOPNOTSUPP;
 		break;
@@ -1098,6 +1112,7 @@ static int ct_handle_hxg(struct intel_guc_ct *ct, struct ct_incoming_msg *msg)
 		break;
 	case GUC_HXG_TYPE_RESPONSE_SUCCESS:
 	case GUC_HXG_TYPE_RESPONSE_FAILURE:
+	case GUC_HXG_TYPE_NO_RESPONSE_RETRY:
 		err = ct_handle_response(ct, msg);
 		break;
 	default:
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
index dcb51b53b49544f71ef7fe02126009a865404df5..a0372735cddb1d7b84e89762ff3a100182ee0ed6 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
@@ -16,13 +16,15 @@
 
 static void guc_prepare_xfer(struct intel_uncore *uncore)
 {
-	u32 shim_flags = GUC_DISABLE_SRAM_INIT_TO_ZEROES |
-			 GUC_ENABLE_READ_CACHE_LOGIC |
-			 GUC_ENABLE_MIA_CACHING |
+	u32 shim_flags = GUC_ENABLE_READ_CACHE_LOGIC |
 			 GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA |
 			 GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA |
 			 GUC_ENABLE_MIA_CLOCK_GATING;
 
+	if (GRAPHICS_VER_FULL(uncore->i915) < IP_VER(12, 50))
+		shim_flags |= GUC_DISABLE_SRAM_INIT_TO_ZEROES |
+			      GUC_ENABLE_MIA_CACHING;
+
 	/* Must program this register before loading the ucode with DMA */
 	intel_uncore_write(uncore, GUC_SHIM_CONTROL, shim_flags);
 
@@ -91,11 +93,10 @@ static int guc_xfer_rsa(struct intel_uc_fw *guc_fw,
 static inline bool guc_ready(struct intel_uncore *uncore, u32 *status)
 {
 	u32 val = intel_uncore_read(uncore, GUC_STATUS);
-	u32 uk_val = val & GS_UKERNEL_MASK;
+	u32 uk_val = REG_FIELD_GET(GS_UKERNEL_MASK, val);
 
 	*status = val;
-	return (uk_val == GS_UKERNEL_READY) ||
-		((val & GS_MIA_CORE_STATE) && (uk_val == GS_UKERNEL_LAPIC_DONE));
+	return uk_val == INTEL_GUC_LOAD_STATUS_READY;
 }
 
 static int guc_wait_ucode(struct intel_uncore *uncore)
@@ -106,17 +107,26 @@ static int guc_wait_ucode(struct intel_uncore *uncore)
 	/*
 	 * Wait for the GuC to start up.
 	 * NB: Docs recommend not using the interrupt for completion.
-	 * Measurements indicate this should take no more than 20ms, so a
+	 * Measurements indicate this should take no more than 20ms
+	 * (assuming the GT clock is at maximum frequency). So, a
 	 * timeout here indicates that the GuC has failed and is unusable.
 	 * (Higher levels of the driver may decide to reset the GuC and
 	 * attempt the ucode load again if this happens.)
+	 *
+	 * FIXME: There is a known (but exceedingly unlikely) race condition
+	 * where the asynchronous frequency management code could reduce
+	 * the GT clock while a GuC reload is in progress (during a full
+	 * GT reset). A fix is in progress but there are complex locking
+	 * issues to be resolved. In the meantime bump the timeout to
+	 * 200ms. Even at slowest clock, this should be sufficient. And
+	 * in the working case, a larger timeout makes no difference.
 	 */
-	ret = wait_for(guc_ready(uncore, &status), 100);
+	ret = wait_for(guc_ready(uncore, &status), 200);
 	if (ret) {
 		struct drm_device *drm = &uncore->i915->drm;
 
-		drm_dbg(drm, "GuC load failed: status = 0x%08X\n", status);
-		drm_dbg(drm, "GuC load failed: status: Reset = %d, "
+		drm_info(drm, "GuC load failed: status = 0x%08X\n", status);
+		drm_info(drm, "GuC load failed: status: Reset = %d, "
 			"BootROM = 0x%02X, UKernel = 0x%02X, "
 			"MIA = 0x%02X, Auth = 0x%02X\n",
 			REG_FIELD_GET(GS_MIA_IN_RESET, status),
@@ -126,13 +136,13 @@ static int guc_wait_ucode(struct intel_uncore *uncore)
 			REG_FIELD_GET(GS_AUTH_STATUS_MASK, status));
 
 		if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) {
-			drm_dbg(drm, "GuC firmware signature verification failed\n");
+			drm_info(drm, "GuC firmware signature verification failed\n");
 			ret = -ENOEXEC;
 		}
 
-		if ((status & GS_UKERNEL_MASK) == GS_UKERNEL_EXCEPTION) {
-			drm_dbg(drm, "GuC firmware exception. EIP: %#x\n",
-				intel_uncore_read(uncore, SOFT_SCRATCH(13)));
+		if (REG_FIELD_GET(GS_UKERNEL_MASK, status) == INTEL_GUC_LOAD_STATUS_EXCEPTION) {
+			drm_info(drm, "GuC firmware exception. EIP: %#x\n",
+				 intel_uncore_read(uncore, SOFT_SCRATCH(13)));
 			ret = -ENXIO;
 		}
 	}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
index 7072e30e99f40a3b85f19ff924d3a9f61e2cfaeb..6a4612a852e20d63d8d43999f2c84aa9157f84a1 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
@@ -16,6 +16,7 @@
 #include "abi/guc_errors_abi.h"
 #include "abi/guc_communication_mmio_abi.h"
 #include "abi/guc_communication_ctb_abi.h"
+#include "abi/guc_klvs_abi.h"
 #include "abi/guc_messages_abi.h"
 
 /* Payload length only i.e. don't include G2H header length */
@@ -84,19 +85,24 @@
 #define GUC_STAGE_DESC_ATTR_TERMINATED	BIT(7)
 
 #define GUC_CTL_LOG_PARAMS		0
-#define   GUC_LOG_VALID			(1 << 0)
-#define   GUC_LOG_NOTIFY_ON_HALF_FULL	(1 << 1)
-#define   GUC_LOG_ALLOC_IN_MEGABYTE	(1 << 3)
+#define   GUC_LOG_VALID			BIT(0)
+#define   GUC_LOG_NOTIFY_ON_HALF_FULL	BIT(1)
+#define   GUC_LOG_CAPTURE_ALLOC_UNITS	BIT(2)
+#define   GUC_LOG_LOG_ALLOC_UNITS	BIT(3)
 #define   GUC_LOG_CRASH_SHIFT		4
 #define   GUC_LOG_CRASH_MASK		(0x3 << GUC_LOG_CRASH_SHIFT)
 #define   GUC_LOG_DEBUG_SHIFT		6
 #define   GUC_LOG_DEBUG_MASK	        (0xF << GUC_LOG_DEBUG_SHIFT)
+#define   GUC_LOG_CAPTURE_SHIFT		10
+#define   GUC_LOG_CAPTURE_MASK	        (0x3 << GUC_LOG_CAPTURE_SHIFT)
 #define   GUC_LOG_BUF_ADDR_SHIFT	12
 
 #define GUC_CTL_WA			1
+#define   GUC_WA_POLLCS                 BIT(18)
+
 #define GUC_CTL_FEATURE			2
-#define   GUC_CTL_DISABLE_SCHEDULER	(1 << 14)
 #define   GUC_CTL_ENABLE_SLPC		BIT(2)
+#define   GUC_CTL_DISABLE_SCHEDULER	BIT(14)
 
 #define GUC_CTL_DEBUG			3
 #define   GUC_LOG_VERBOSITY_SHIFT	0
@@ -116,6 +122,8 @@
 #define   GUC_ADS_ADDR_SHIFT		1
 #define   GUC_ADS_ADDR_MASK		(0xFFFFF << GUC_ADS_ADDR_SHIFT)
 
+#define GUC_CTL_DEVID			5
+
 #define GUC_CTL_MAX_DWORDS		(SOFT_SCRATCH_COUNT - 2) /* [1..14] */
 
 /* Generic GT SysInfo data types */
@@ -263,7 +271,10 @@ struct guc_mmio_reg {
 	u32 offset;
 	u32 value;
 	u32 flags;
-#define GUC_REGSET_MASKED		(1 << 0)
+	u32 mask;
+#define GUC_REGSET_MASKED		BIT(0)
+#define GUC_REGSET_MASKED_WITH_VALUE	BIT(2)
+#define GUC_REGSET_RESTORE_ONLY		BIT(3)
 } __packed;
 
 /* GuC register sets */
@@ -280,6 +291,12 @@ struct guc_gt_system_info {
 	u32 generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_MAX];
 } __packed;
 
+enum {
+	GUC_CAPTURE_LIST_INDEX_PF = 0,
+	GUC_CAPTURE_LIST_INDEX_VF = 1,
+	GUC_CAPTURE_LIST_INDEX_MAX = 2,
+};
+
 /* GuC Additional Data Struct */
 struct guc_ads {
 	struct guc_mmio_reg_set reg_state_list[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS];
@@ -291,7 +308,11 @@ struct guc_ads {
 	u32 golden_context_lrca[GUC_MAX_ENGINE_CLASSES];
 	u32 eng_state_size[GUC_MAX_ENGINE_CLASSES];
 	u32 private_data;
-	u32 reserved[15];
+	u32 reserved2;
+	u32 capture_instance[GUC_CAPTURE_LIST_INDEX_MAX][GUC_MAX_ENGINE_CLASSES];
+	u32 capture_class[GUC_CAPTURE_LIST_INDEX_MAX][GUC_MAX_ENGINE_CLASSES];
+	u32 capture_global[GUC_CAPTURE_LIST_INDEX_MAX];
+	u32 reserved[14];
 } __packed;
 
 /* Engine usage stats */
@@ -312,6 +333,7 @@ struct guc_engine_usage {
 enum guc_log_buffer_type {
 	GUC_DEBUG_LOG_BUFFER,
 	GUC_CRASH_DUMP_LOG_BUFFER,
+	GUC_CAPTURE_LOG_BUFFER,
 	GUC_MAX_LOG_BUFFER
 };
 
@@ -342,6 +364,7 @@ struct guc_log_buffer_state {
 	u32 write_ptr;
 	u32 size;
 	u32 sampled_write_ptr;
+	u32 wrap_offset;
 	union {
 		struct {
 			u32 flush_to_file:1;
@@ -382,7 +405,7 @@ struct guc_shared_ctx_data {
 /* This action will be programmed in C1BC - SOFT_SCRATCH_15_REG */
 enum intel_guc_recv_message {
 	INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED = BIT(1),
-	INTEL_GUC_RECV_MSG_FLUSH_LOG_BUFFER = BIT(3)
+	INTEL_GUC_RECV_MSG_EXCEPTION = BIT(30),
 };
 
 #endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
index 7b0b43e87244c9e599f2c486412281278b7f9289..b53f61f3101fe66af1e9e90418fbc8b34a8198d9 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
@@ -56,20 +56,6 @@ static int guc_action_control_log(struct intel_guc *guc, bool enable,
 	return intel_guc_send(guc, action, ARRAY_SIZE(action));
 }
 
-static void guc_log_enable_flush_events(struct intel_guc_log *log)
-{
-	intel_guc_enable_msg(log_to_guc(log),
-			     INTEL_GUC_RECV_MSG_FLUSH_LOG_BUFFER |
-			     INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED);
-}
-
-static void guc_log_disable_flush_events(struct intel_guc_log *log)
-{
-	intel_guc_disable_msg(log_to_guc(log),
-			      INTEL_GUC_RECV_MSG_FLUSH_LOG_BUFFER |
-			      INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED);
-}
-
 /*
  * Sub buffer switch callback. Called whenever relay has to switch to a new
  * sub buffer, relay stays on the same sub buffer if 0 is returned.
@@ -202,6 +188,8 @@ static unsigned int guc_get_log_buffer_size(enum guc_log_buffer_type type)
 		return DEBUG_BUFFER_SIZE;
 	case GUC_CRASH_DUMP_LOG_BUFFER:
 		return CRASH_BUFFER_SIZE;
+	case GUC_CAPTURE_LOG_BUFFER:
+		return CAPTURE_BUFFER_SIZE;
 	default:
 		MISSING_CASE(type);
 	}
@@ -464,14 +452,19 @@ int intel_guc_log_create(struct intel_guc_log *log)
 	 *  +-------------------------------+ 32B
 	 *  |      Debug state header       |
 	 *  +-------------------------------+ 64B
+	 *  |     Capture state header      |
+	 *  +-------------------------------+ 96B
 	 *  |                               |
 	 *  +===============================+ PAGE_SIZE (4KB)
 	 *  |        Crash Dump logs        |
 	 *  +===============================+ + CRASH_SIZE
 	 *  |          Debug logs           |
 	 *  +===============================+ + DEBUG_SIZE
+	 *  |         Capture logs          |
+	 *  +===============================+ + CAPTURE_SIZE
 	 */
-	guc_log_size = PAGE_SIZE + CRASH_BUFFER_SIZE + DEBUG_BUFFER_SIZE;
+	guc_log_size = PAGE_SIZE + CRASH_BUFFER_SIZE + DEBUG_BUFFER_SIZE +
+		       CAPTURE_BUFFER_SIZE;
 
 	vma = intel_guc_allocate_vma(guc, guc_log_size);
 	if (IS_ERR(vma)) {
@@ -593,8 +586,6 @@ int intel_guc_log_relay_start(struct intel_guc_log *log)
 	if (log->relay.started)
 		return -EEXIST;
 
-	guc_log_enable_flush_events(log);
-
 	/*
 	 * When GuC is logging without us relaying to userspace, we're ignoring
 	 * the flush notification. This means that we need to unconditionally
@@ -641,7 +632,6 @@ static void guc_log_relay_stop(struct intel_guc_log *log)
 	if (!log->relay.started)
 		return;
 
-	guc_log_disable_flush_events(log);
 	intel_synchronize_irq(i915);
 
 	flush_work(&log->relay.flush_work);
@@ -662,7 +652,8 @@ void intel_guc_log_relay_close(struct intel_guc_log *log)
 
 void intel_guc_log_handle_flush_event(struct intel_guc_log *log)
 {
-	queue_work(system_highpri_wq, &log->relay.flush_work);
+	if (log->relay.started)
+		queue_work(system_highpri_wq, &log->relay.flush_work);
 }
 
 static const char *
@@ -673,6 +664,8 @@ stringify_guc_log_type(enum guc_log_buffer_type type)
 		return "DEBUG";
 	case GUC_CRASH_DUMP_LOG_BUFFER:
 		return "CRASH";
+	case GUC_CAPTURE_LOG_BUFFER:
+		return "CAPTURE";
 	default:
 		MISSING_CASE(type);
 	}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
index fe6ab7550a148f9964b68cfe481ba8bae6e38744..d7e1b6471feddd35bda16bfefe42dfd53a1927e5 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
@@ -18,12 +18,15 @@ struct intel_guc;
 #if defined(CONFIG_DRM_I915_DEBUG_GUC)
 #define CRASH_BUFFER_SIZE	SZ_2M
 #define DEBUG_BUFFER_SIZE	SZ_16M
+#define CAPTURE_BUFFER_SIZE	SZ_4M
 #elif defined(CONFIG_DRM_I915_DEBUG_GEM)
 #define CRASH_BUFFER_SIZE	SZ_1M
 #define DEBUG_BUFFER_SIZE	SZ_2M
+#define CAPTURE_BUFFER_SIZE	SZ_1M
 #else
 #define CRASH_BUFFER_SIZE	SZ_8K
 #define DEBUG_BUFFER_SIZE	SZ_64K
+#define CAPTURE_BUFFER_SIZE	SZ_16K
 #endif
 
 /*
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h
index 85846c5570c52189443d963fbe8315ac616470ba..66027a42cda9e0e2f56fc09d36224c57e5e9b85e 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h
@@ -22,10 +22,6 @@
 #define   GS_BOOTROM_JUMP_PASSED	  (0x76 << GS_BOOTROM_SHIFT)
 #define   GS_UKERNEL_SHIFT		8
 #define   GS_UKERNEL_MASK		  (0xFF << GS_UKERNEL_SHIFT)
-#define   GS_UKERNEL_LAPIC_DONE		  (0x30 << GS_UKERNEL_SHIFT)
-#define   GS_UKERNEL_DPC_ERROR		  (0x60 << GS_UKERNEL_SHIFT)
-#define   GS_UKERNEL_EXCEPTION		  (0x70 << GS_UKERNEL_SHIFT)
-#define   GS_UKERNEL_READY		  (0xF0 << GS_UKERNEL_SHIFT)
 #define   GS_MIA_SHIFT			16
 #define   GS_MIA_MASK			  (0x07 << GS_MIA_SHIFT)
 #define   GS_MIA_CORE_STATE		  (0x01 << GS_MIA_SHIFT)
@@ -98,6 +94,9 @@
 #define   GUC_ENABLE_MIA_CLOCK_GATING		(1<<15)
 #define   GUC_GEN10_SHIM_WC_ENABLE		(1<<21)
 
+#define GUC_SHIM_CONTROL2		_MMIO(0xc068)
+#define   GUC_IS_PRIVILEGED		(1<<29)
+
 #define GUC_SEND_INTERRUPT		_MMIO(0xc4c8)
 #define   GUC_SEND_TRIGGER		  (1<<0)
 #define GEN11_GUC_HOST_INTERRUPT	_MMIO(0x1901f0)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index ddbea939b1dc3f9126ea771f294ef1403b2c40d3..b3d28b003b73a1466afedb44c4d0b4cb6bb10ef7 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -3,9 +3,12 @@
  * Copyright © 2021 Intel Corporation
  */
 
+#include <drm/drm_cache.h>
+
 #include "i915_drv.h"
 #include "i915_reg.h"
 #include "intel_guc_slpc.h"
+#include "intel_mchbar_regs.h"
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_regs.h"
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 04b8321fc7587ed05849f22f922dec877cc03bd6..b3a429a92c0da85c1532773253f927b8d4710d63 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1430,7 +1430,8 @@ submission_disabled(struct intel_guc *guc)
 	struct i915_sched_engine * const sched_engine = guc->sched_engine;
 
 	return unlikely(!sched_engine ||
-			!__tasklet_is_enabled(&sched_engine->tasklet));
+			!__tasklet_is_enabled(&sched_engine->tasklet) ||
+			intel_gt_is_wedged(guc_to_gt(guc)));
 }
 
 static void disable_submission(struct intel_guc *guc)
@@ -1475,8 +1476,6 @@ static void guc_flush_destroyed_contexts(struct intel_guc *guc);
 
 void intel_guc_submission_reset_prepare(struct intel_guc *guc)
 {
-	int i;
-
 	if (unlikely(!guc_submission_initialized(guc))) {
 		/* Reset called during driver load? GuC not yet initialised! */
 		return;
@@ -1493,21 +1492,7 @@ void intel_guc_submission_reset_prepare(struct intel_guc *guc)
 
 	guc_flush_submissions(guc);
 	guc_flush_destroyed_contexts(guc);
-
-	/*
-	 * Handle any outstanding G2Hs before reset. Call IRQ handler directly
-	 * each pass as interrupt have been disabled. We always scrub for
-	 * outstanding G2H as it is possible for outstanding_submission_g2h to
-	 * be incremented after the context state update.
-	 */
-	for (i = 0; i < 4 && atomic_read(&guc->outstanding_submission_g2h); ++i) {
-		intel_guc_to_host_event_handler(guc);
-#define wait_for_reset(guc, wait_var) \
-		intel_guc_wait_for_pending_msg(guc, wait_var, false, (HZ / 20))
-		do {
-			wait_for_reset(guc, &guc->outstanding_submission_g2h);
-		} while (!list_empty(&guc->ct.requests.incoming));
-	}
+	flush_work(&guc->ct.requests.worker);
 
 	scrub_guc_desc_for_outstanding_g2h(guc);
 }
@@ -1612,7 +1597,6 @@ static void __guc_reset_context(struct intel_context *ce, bool stalled)
 	unsigned long flags;
 	u32 head;
 	int i, number_children = ce->parallel.number_children;
-	bool skip = false;
 	struct intel_context *parent = ce;
 
 	GEM_BUG_ON(intel_context_is_child(ce));
@@ -1623,23 +1607,10 @@ static void __guc_reset_context(struct intel_context *ce, bool stalled)
 	 * GuC will implicitly mark the context as non-schedulable when it sends
 	 * the reset notification. Make sure our state reflects this change. The
 	 * context will be marked enabled on resubmission.
-	 *
-	 * XXX: If the context is reset as a result of the request cancellation
-	 * this G2H is received after the schedule disable complete G2H which is
-	 * wrong as this creates a race between the request cancellation code
-	 * re-submitting the context and this G2H handler. This is a bug in the
-	 * GuC but can be worked around in the meantime but converting this to a
-	 * NOP if a pending enable is in flight as this indicates that a request
-	 * cancellation has occurred.
 	 */
 	spin_lock_irqsave(&ce->guc_state.lock, flags);
-	if (likely(!context_pending_enable(ce)))
-		clr_context_enabled(ce);
-	else
-		skip = true;
+	clr_context_enabled(ce);
 	spin_unlock_irqrestore(&ce->guc_state.lock, flags);
-	if (unlikely(skip))
-		goto out_put;
 
 	/*
 	 * For each context in the relationship find the hanging request
@@ -1671,7 +1642,6 @@ static void __guc_reset_context(struct intel_context *ce, bool stalled)
 	}
 
 	__unwind_incomplete_requests(parent);
-out_put:
 	intel_context_put(parent);
 }
 
@@ -1806,7 +1776,7 @@ void intel_guc_submission_reset_finish(struct intel_guc *guc)
 {
 	/* Reset called during driver load or during wedge? */
 	if (unlikely(!guc_submission_initialized(guc) ||
-		     test_bit(I915_WEDGED, &guc_to_gt(guc)->reset.flags))) {
+		     intel_gt_is_wedged(guc_to_gt(guc)))) {
 		return;
 	}
 
@@ -1825,6 +1795,7 @@ void intel_guc_submission_reset_finish(struct intel_guc *guc)
 }
 
 static void destroyed_worker_func(struct work_struct *w);
+static void reset_fail_worker_func(struct work_struct *w);
 
 /*
  * Set up the memory resources to be shared with the GuC (via the GGTT)
@@ -1855,6 +1826,8 @@ int intel_guc_submission_init(struct intel_guc *guc)
 	INIT_LIST_HEAD(&guc->submission_state.destroyed_contexts);
 	INIT_WORK(&guc->submission_state.destroyed_worker,
 		  destroyed_worker_func);
+	INIT_WORK(&guc->submission_state.reset_fail_worker,
+		  reset_fail_worker_func);
 
 	guc->submission_state.guc_ids_bitmap =
 		bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
@@ -2611,12 +2584,6 @@ static void guc_context_cancel_request(struct intel_context *ce,
 					true);
 		}
 
-		/*
-		 * XXX: Racey if context is reset, see comment in
-		 * __guc_reset_context().
-		 */
-		flush_work(&ce_to_guc(ce)->ct.requests.worker);
-
 		guc_context_unblock(block_context);
 		intel_context_put(ce);
 	}
@@ -3330,8 +3297,6 @@ static void guc_parent_context_unpin(struct intel_context *ce)
 	GEM_BUG_ON(!intel_context_is_parent(ce));
 	GEM_BUG_ON(!intel_engine_is_virtual(ce->engine));
 
-	if (ce->parallel.last_rq)
-		i915_request_put(ce->parallel.last_rq);
 	unpin_guc_id(guc, ce);
 	lrc_unpin(ce);
 }
@@ -4053,14 +4018,14 @@ static void guc_handle_context_reset(struct intel_guc *guc,
 {
 	trace_intel_context_reset(ce);
 
-	/*
-	 * XXX: Racey if request cancellation has occurred, see comment in
-	 * __guc_reset_context().
-	 */
-	if (likely(!intel_context_is_banned(ce) &&
-		   !context_blocked(ce))) {
+	if (likely(!intel_context_is_banned(ce))) {
 		capture_error_state(guc, ce);
 		guc_context_replay(ce);
+	} else {
+		drm_err(&guc_to_gt(guc)->i915->drm,
+			"Invalid GuC engine reset notificaion for 0x%04X on %s: banned = %d, blocked = %d",
+			ce->guc_id.id, ce->engine->name, intel_context_is_banned(ce),
+			context_blocked(ce));
 	}
 }
 
@@ -4099,6 +4064,24 @@ int intel_guc_context_reset_process_msg(struct intel_guc *guc,
 	return 0;
 }
 
+int intel_guc_error_capture_process_msg(struct intel_guc *guc,
+					const u32 *msg, u32 len)
+{
+	int status;
+
+	if (unlikely(len != 1)) {
+		drm_dbg(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len);
+		return -EPROTO;
+	}
+
+	status = msg[0];
+	drm_info(&guc_to_gt(guc)->i915->drm, "Got error capture: status = %d", status);
+
+	/* FIXME: Do something with the capture */
+
+	return 0;
+}
+
 static struct intel_engine_cs *
 guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance)
 {
@@ -4111,6 +4094,26 @@ guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance)
 	return gt->engine_class[engine_class][instance];
 }
 
+static void reset_fail_worker_func(struct work_struct *w)
+{
+	struct intel_guc *guc = container_of(w, struct intel_guc,
+					     submission_state.reset_fail_worker);
+	struct intel_gt *gt = guc_to_gt(guc);
+	intel_engine_mask_t reset_fail_mask;
+	unsigned long flags;
+
+	spin_lock_irqsave(&guc->submission_state.lock, flags);
+	reset_fail_mask = guc->submission_state.reset_fail_mask;
+	guc->submission_state.reset_fail_mask = 0;
+	spin_unlock_irqrestore(&guc->submission_state.lock, flags);
+
+	if (likely(reset_fail_mask))
+		intel_gt_handle_error(gt, reset_fail_mask,
+				      I915_ERROR_CAPTURE,
+				      "GuC failed to reset engine mask=0x%x\n",
+				      reset_fail_mask);
+}
+
 int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
 					 const u32 *msg, u32 len)
 {
@@ -4118,6 +4121,7 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
 	struct intel_gt *gt = guc_to_gt(guc);
 	u8 guc_class, instance;
 	u32 reason;
+	unsigned long flags;
 
 	if (unlikely(len != 3)) {
 		drm_err(&gt->i915->drm, "Invalid length %u", len);
@@ -4142,10 +4146,15 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
 	drm_err(&gt->i915->drm, "GuC engine reset request failed on %d:%d (%s) because 0x%08X",
 		guc_class, instance, engine->name, reason);
 
-	intel_gt_handle_error(gt, engine->mask,
-			      I915_ERROR_CAPTURE,
-			      "GuC failed to reset %s (reason=0x%08x)\n",
-			      engine->name, reason);
+	spin_lock_irqsave(&guc->submission_state.lock, flags);
+	guc->submission_state.reset_fail_mask |= engine->mask;
+	spin_unlock_irqrestore(&guc->submission_state.lock, flags);
+
+	/*
+	 * A GT reset flushes this worker queue (G2H handler) so we must use
+	 * another worker to trigger a GT reset.
+	 */
+	queue_work(system_unbound_wq, &guc->submission_state.reset_fail_worker);
 
 	return 0;
 }
@@ -4514,27 +4523,31 @@ static inline bool skip_handshake(struct i915_request *rq)
 	return test_bit(I915_FENCE_FLAG_SKIP_PARALLEL, &rq->fence.flags);
 }
 
+#define NON_SKIP_LEN	6
 static u32 *
 emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
 						 u32 *cs)
 {
 	struct intel_context *ce = rq->context;
+	__maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs;
+	__maybe_unused u32 *start_fini_breadcrumb_cs = cs;
 
 	GEM_BUG_ON(!intel_context_is_parent(ce));
 
 	if (unlikely(skip_handshake(rq))) {
 		/*
 		 * NOP everything in __emit_fini_breadcrumb_parent_no_preempt_mid_batch,
-		 * the -6 comes from the length of the emits below.
+		 * the NON_SKIP_LEN comes from the length of the emits below.
 		 */
 		memset(cs, 0, sizeof(u32) *
-		       (ce->engine->emit_fini_breadcrumb_dw - 6));
-		cs += ce->engine->emit_fini_breadcrumb_dw - 6;
+		       (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN));
+		cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN;
 	} else {
 		cs = __emit_fini_breadcrumb_parent_no_preempt_mid_batch(rq, cs);
 	}
 
 	/* Emit fini breadcrumb */
+	before_fini_breadcrumb_user_interrupt_cs = cs;
 	cs = gen8_emit_ggtt_write(cs,
 				  rq->fence.seqno,
 				  i915_request_active_timeline(rq)->hwsp_offset,
@@ -4544,6 +4557,12 @@ emit_fini_breadcrumb_parent_no_preempt_mid_batch(struct i915_request *rq,
 	*cs++ = MI_USER_INTERRUPT;
 	*cs++ = MI_NOOP;
 
+	/* Ensure our math for skip + emit is correct */
+	GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN !=
+		   cs);
+	GEM_BUG_ON(start_fini_breadcrumb_cs +
+		   ce->engine->emit_fini_breadcrumb_dw != cs);
+
 	rq->tail = intel_ring_offset(rq, cs);
 
 	return cs;
@@ -4586,22 +4605,25 @@ emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
 						u32 *cs)
 {
 	struct intel_context *ce = rq->context;
+	__maybe_unused u32 *before_fini_breadcrumb_user_interrupt_cs;
+	__maybe_unused u32 *start_fini_breadcrumb_cs = cs;
 
 	GEM_BUG_ON(!intel_context_is_child(ce));
 
 	if (unlikely(skip_handshake(rq))) {
 		/*
 		 * NOP everything in __emit_fini_breadcrumb_child_no_preempt_mid_batch,
-		 * the -6 comes from the length of the emits below.
+		 * the NON_SKIP_LEN comes from the length of the emits below.
 		 */
 		memset(cs, 0, sizeof(u32) *
-		       (ce->engine->emit_fini_breadcrumb_dw - 6));
-		cs += ce->engine->emit_fini_breadcrumb_dw - 6;
+		       (ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN));
+		cs += ce->engine->emit_fini_breadcrumb_dw - NON_SKIP_LEN;
 	} else {
 		cs = __emit_fini_breadcrumb_child_no_preempt_mid_batch(rq, cs);
 	}
 
 	/* Emit fini breadcrumb */
+	before_fini_breadcrumb_user_interrupt_cs = cs;
 	cs = gen8_emit_ggtt_write(cs,
 				  rq->fence.seqno,
 				  i915_request_active_timeline(rq)->hwsp_offset,
@@ -4611,11 +4633,19 @@ emit_fini_breadcrumb_child_no_preempt_mid_batch(struct i915_request *rq,
 	*cs++ = MI_USER_INTERRUPT;
 	*cs++ = MI_NOOP;
 
+	/* Ensure our math for skip + emit is correct */
+	GEM_BUG_ON(before_fini_breadcrumb_user_interrupt_cs + NON_SKIP_LEN !=
+		   cs);
+	GEM_BUG_ON(start_fini_breadcrumb_cs +
+		   ce->engine->emit_fini_breadcrumb_dw != cs);
+
 	rq->tail = intel_ring_offset(rq, cs);
 
 	return cs;
 }
 
+#undef NON_SKIP_LEN
+
 static struct intel_context *
 guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
 		   unsigned long flags)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
index d10b227ac4aa0347f2963c5f9434be9cccab732f..556829de9c17287dc8f2ee598627fed91caf123d 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
@@ -124,6 +124,7 @@ int intel_huc_auth(struct intel_huc *huc)
 	}
 
 	intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_RUNNING);
+	drm_info(&gt->i915->drm, "HuC authenticated\n");
 	return 0;
 
 fail:
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index 09ed29df67bc9d888cf9f62166d5e6edf74dd285..da199aa6989fba35fcc129f48f8d86d5dd886ed7 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -432,6 +432,15 @@ static int __uc_check_hw(struct intel_uc *uc)
 	return 0;
 }
 
+static void print_fw_ver(struct intel_uc *uc, struct intel_uc_fw *fw)
+{
+	struct drm_i915_private *i915 = uc_to_gt(uc)->i915;
+
+	drm_info(&i915->drm, "%s firmware %s version %u.%u\n",
+		 intel_uc_fw_type_repr(fw->type), fw->path,
+		 fw->major_ver_found, fw->minor_ver_found);
+}
+
 static int __uc_init_hw(struct intel_uc *uc)
 {
 	struct drm_i915_private *i915 = uc_to_gt(uc)->i915;
@@ -442,6 +451,11 @@ static int __uc_init_hw(struct intel_uc *uc)
 	GEM_BUG_ON(!intel_uc_supports_guc(uc));
 	GEM_BUG_ON(!intel_uc_wants_guc(uc));
 
+	print_fw_ver(uc, &guc->fw);
+
+	if (intel_uc_uses_huc(uc))
+		print_fw_ver(uc, &huc->fw);
+
 	if (!intel_uc_fw_is_loadable(&guc->fw)) {
 		ret = __uc_check_hw(uc) ||
 		      intel_uc_fw_is_overridden(&guc->fw) ||
@@ -507,24 +521,11 @@ static int __uc_init_hw(struct intel_uc *uc)
 		intel_rps_lower_unslice(&uc_to_gt(uc)->rps);
 	}
 
-	drm_info(&i915->drm, "%s firmware %s version %u.%u %s:%s\n",
-		 intel_uc_fw_type_repr(INTEL_UC_FW_TYPE_GUC), guc->fw.path,
-		 guc->fw.major_ver_found, guc->fw.minor_ver_found,
-		 "submission",
+	drm_info(&i915->drm, "GuC submission %s\n",
 		 enableddisabled(intel_uc_uses_guc_submission(uc)));
-
-	drm_info(&i915->drm, "GuC SLPC: %s\n",
+	drm_info(&i915->drm, "GuC SLPC %s\n",
 		 enableddisabled(intel_uc_uses_guc_slpc(uc)));
 
-	if (intel_uc_uses_huc(uc)) {
-		drm_info(&i915->drm, "%s firmware %s version %u.%u %s:%s\n",
-			 intel_uc_fw_type_repr(INTEL_UC_FW_TYPE_HUC),
-			 huc->fw.path,
-			 huc->fw.major_ver_found, huc->fw.minor_ver_found,
-			 "authenticated",
-			 yesno(intel_huc_is_authenticated(huc)));
-	}
-
 	return 0;
 
 	/*
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index dd588ecf048a32d68bcb28629b70c91817187474..c881130444948ec37bfca7101c34a5efd756de56 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -5,6 +5,8 @@
 
 #include <linux/bitfield.h>
 #include <linux/firmware.h>
+
+#include <drm/drm_cache.h>
 #include <drm/drm_print.h>
 
 #include "gem/i915_gem_lmem.h"
@@ -50,21 +52,21 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
  * firmware as TGL.
  */
 #define INTEL_GUC_FIRMWARE_DEFS(fw_def, guc_def) \
-	fw_def(ALDERLAKE_P,  0, guc_def(adlp, 62, 0, 3)) \
-	fw_def(ALDERLAKE_S,  0, guc_def(tgl,  62, 0, 0)) \
-	fw_def(DG1,          0, guc_def(dg1,  62, 0, 0)) \
-	fw_def(ROCKETLAKE,   0, guc_def(tgl,  62, 0, 0)) \
-	fw_def(TIGERLAKE,    0, guc_def(tgl,  62, 0, 0)) \
-	fw_def(JASPERLAKE,   0, guc_def(ehl,  62, 0, 0)) \
-	fw_def(ELKHARTLAKE,  0, guc_def(ehl,  62, 0, 0)) \
-	fw_def(ICELAKE,      0, guc_def(icl,  62, 0, 0)) \
-	fw_def(COMETLAKE,    5, guc_def(cml,  62, 0, 0)) \
-	fw_def(COMETLAKE,    0, guc_def(kbl,  62, 0, 0)) \
-	fw_def(COFFEELAKE,   0, guc_def(kbl,  62, 0, 0)) \
-	fw_def(GEMINILAKE,   0, guc_def(glk,  62, 0, 0)) \
-	fw_def(KABYLAKE,     0, guc_def(kbl,  62, 0, 0)) \
-	fw_def(BROXTON,      0, guc_def(bxt,  62, 0, 0)) \
-	fw_def(SKYLAKE,      0, guc_def(skl,  62, 0, 0))
+	fw_def(ALDERLAKE_P,  0, guc_def(adlp, 69, 0, 3)) \
+	fw_def(ALDERLAKE_S,  0, guc_def(tgl,  69, 0, 3)) \
+	fw_def(DG1,          0, guc_def(dg1,  69, 0, 3)) \
+	fw_def(ROCKETLAKE,   0, guc_def(tgl,  69, 0, 3)) \
+	fw_def(TIGERLAKE,    0, guc_def(tgl,  69, 0, 3)) \
+	fw_def(JASPERLAKE,   0, guc_def(ehl,  69, 0, 3)) \
+	fw_def(ELKHARTLAKE,  0, guc_def(ehl,  69, 0, 3)) \
+	fw_def(ICELAKE,      0, guc_def(icl,  69, 0, 3)) \
+	fw_def(COMETLAKE,    5, guc_def(cml,  69, 0, 3)) \
+	fw_def(COMETLAKE,    0, guc_def(kbl,  69, 0, 3)) \
+	fw_def(COFFEELAKE,   0, guc_def(kbl,  69, 0, 3)) \
+	fw_def(GEMINILAKE,   0, guc_def(glk,  69, 0, 3)) \
+	fw_def(KABYLAKE,     0, guc_def(kbl,  69, 0, 3)) \
+	fw_def(BROXTON,      0, guc_def(bxt,  69, 0, 3)) \
+	fw_def(SKYLAKE,      0, guc_def(skl,  69, 0, 3))
 
 #define INTEL_HUC_FIRMWARE_DEFS(fw_def, huc_def) \
 	fw_def(ALDERLAKE_P,  0, huc_def(tgl,  7, 9, 3)) \
@@ -449,20 +451,19 @@ static void uc_fw_bind_ggtt(struct intel_uc_fw *uc_fw)
 {
 	struct drm_i915_gem_object *obj = uc_fw->obj;
 	struct i915_ggtt *ggtt = __uc_fw_to_gt(uc_fw)->ggtt;
-	struct i915_vma *dummy = &uc_fw->dummy;
+	struct i915_vma_resource *dummy = &uc_fw->dummy;
 	u32 pte_flags = 0;
 
-	dummy->node.start = uc_fw_ggtt_offset(uc_fw);
-	dummy->node.size = obj->base.size;
-	dummy->pages = obj->mm.pages;
-	dummy->vm = &ggtt->vm;
+	dummy->start = uc_fw_ggtt_offset(uc_fw);
+	dummy->node_size = obj->base.size;
+	dummy->bi.pages = obj->mm.pages;
 
 	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
-	GEM_BUG_ON(dummy->node.size > ggtt->uc_fw.size);
+	GEM_BUG_ON(dummy->node_size > ggtt->uc_fw.size);
 
 	/* uc_fw->obj cache domains were not controlled across suspend */
 	if (i915_gem_object_has_struct_page(obj))
-		drm_clflush_sg(dummy->pages);
+		drm_clflush_sg(dummy->bi.pages);
 
 	if (i915_gem_object_is_lmem(obj))
 		pte_flags |= PTE_LM;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
index d9d1dc0b4cbb89ddedb95173e89c9a4326e2bbea..3229018877d3dbc63fa52e395ca647a606dd2a78 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
@@ -85,7 +85,7 @@ struct intel_uc_fw {
 	 * threaded as it done during driver load (inherently single threaded)
 	 * or during a GT reset (mutex guarantees single threaded).
 	 */
-	struct i915_vma dummy;
+	struct i915_vma_resource dummy;
 	struct i915_vma *rsa_data;
 
 	/*
diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
index d3327b802b76146e0b6801d25d5d45052a0baea6..a115894d5896ea445036a340a0f7b85259f7177e 100644
--- a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
@@ -157,7 +157,7 @@ static int intel_guc_steal_guc_ids(void *arg)
 	wakeref = intel_runtime_pm_get(gt->uncore->rpm);
 	engine = intel_selftest_find_any_engine(gt);
 	sv = guc->submission_state.num_guc_ids;
-	guc->submission_state.num_guc_ids = 4096;
+	guc->submission_state.num_guc_ids = 512;
 
 	/* Create spinner to block requests in below loop */
 	ce[context_index] = intel_context_create(engine);
diff --git a/drivers/gpu/drm/i915/gvt/aperture_gm.c b/drivers/gpu/drm/i915/gvt/aperture_gm.c
index 6b3dedd321bb00a1e6c5995ea9ae3617df020a60..557f3314291a8215aa1e541fe024d2053ad927c5 100644
--- a/drivers/gpu/drm/i915/gvt/aperture_gm.c
+++ b/drivers/gpu/drm/i915/gvt/aperture_gm.c
@@ -64,7 +64,7 @@ static int alloc_gm(struct intel_vgpu *vgpu, bool high_gm)
 
 	mutex_lock(&gt->ggtt->vm.mutex);
 	mmio_hw_access_pre(gt);
-	ret = i915_gem_gtt_insert(&gt->ggtt->vm, node,
+	ret = i915_gem_gtt_insert(&gt->ggtt->vm, NULL, node,
 				  size, I915_GTT_PAGE_SIZE,
 				  I915_COLOR_UNEVICTABLE,
 				  start, end, flags);
diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c
index d02a48a5335a6047265956b79e26dd94974e59a8..c95c25d2addbc7ee22f5ec3bf72686597ebb0fde 100644
--- a/drivers/gpu/drm/i915/gvt/dmabuf.c
+++ b/drivers/gpu/drm/i915/gvt/dmabuf.c
@@ -31,6 +31,11 @@
 #include <linux/dma-buf.h>
 #include <linux/vfio.h>
 
+#include <drm/drm_fourcc.h>
+#include <drm/drm_plane.h>
+
+#include "gem/i915_gem_dmabuf.h"
+
 #include "i915_drv.h"
 #include "i915_reg.h"
 #include "gvt.h"
@@ -85,7 +90,7 @@ static int vgpu_gem_get_pages(
 		kfree(st);
 		return ret;
 	}
-	gtt_entries = (gen8_pte_t __iomem *)dev_priv->ggtt.gsm +
+	gtt_entries = (gen8_pte_t __iomem *)to_gt(dev_priv)->ggtt->gsm +
 		(fb_info->start >> PAGE_SHIFT);
 	for_each_sg(st->sgl, sg, page_num, i) {
 		dma_addr_t dma_addr =
diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c
index c8dcda6d4f0d2ece050408c0659e5fd1f0b53fdb..66d354c4195b4e6a2525f8b353e1bc8d9cd23dd3 100644
--- a/drivers/gpu/drm/i915/gvt/execlist.c
+++ b/drivers/gpu/drm/i915/gvt/execlist.c
@@ -163,7 +163,7 @@ static void emulate_csb_update(struct intel_vgpu_execlist *execlist,
 					       hwsp_gpa + I915_HWS_CSB_BUF0_INDEX * 4 + write_pointer * 8,
 					       status, 8);
 		intel_gvt_hypervisor_write_gpa(vgpu,
-					       hwsp_gpa + intel_hws_csb_write_index(execlist->engine->i915) * 4,
+					       hwsp_gpa + INTEL_HWS_CSB_WRITE_INDEX(execlist->engine->i915) * 4,
 					       &write_pointer, 4);
 	}
 
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index c2ae79092b14fd9aa4ff3b7f273c74f3ee5e1413..520a7e1942f39aa0952761695e34304cef72de68 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -40,8 +40,10 @@
 #include "i915_reg.h"
 #include "gvt.h"
 #include "i915_pvinfo.h"
+#include "intel_mchbar_regs.h"
 #include "display/intel_display_types.h"
 #include "display/intel_fbc.h"
+#include "display/vlv_dsi_pll_regs.h"
 #include "gt/intel_gt_regs.h"
 
 /* XXX FIXME i915 has changed PP_XXX definition */
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index 20b82fb036f8cd2a2ffd53a75a2562e2221a1d14..e8d6c76e92341cb1bf3c12385d00efe1dc5210ef 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -46,6 +46,8 @@
 
 #include <linux/nospec.h>
 
+#include <drm/drm_edid.h>
+
 #include "i915_drv.h"
 #include "gvt.h"
 
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index aea4c30645ff99787120d5aff1b085bc62848043..5f6e4163665510c42d019e4d1632d24eff17e220 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -25,6 +25,8 @@
  *
  */
 
+#include <drm/drm_cache.h>
+
 #include "gt/intel_engine.h"
 #include "gt/intel_engine_regs.h"
 #include "gt/intel_gpu_commands.h"
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 4fe1ce1433fed50515b8e5c99fe28ef4b8551078..946bbe57bfe52d4101014a314996a7b490457dba 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -49,6 +49,7 @@
 #include "i915_debugfs_params.h"
 #include "i915_irq.h"
 #include "i915_scheduler.h"
+#include "intel_mchbar_regs.h"
 #include "intel_pm.h"
 
 static inline struct drm_i915_private *node_to_i915(struct drm_info_node *node)
@@ -137,6 +138,17 @@ static const char *stringify_vma_type(const struct i915_vma *vma)
 	return "ppgtt";
 }
 
+static const char *i915_cache_level_str(struct drm_i915_private *i915, int type)
+{
+	switch (type) {
+	case I915_CACHE_NONE: return " uncached";
+	case I915_CACHE_LLC: return HAS_LLC(i915) ? " LLC" : " snooped";
+	case I915_CACHE_L3_LLC: return " L3+LLC";
+	case I915_CACHE_WT: return " WT";
+	default: return "";
+	}
+}
+
 void
 i915_debugfs_describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 {
@@ -171,7 +183,8 @@ i915_debugfs_describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 		seq_printf(m, " (%s offset: %08llx, size: %08llx, pages: %s",
 			   stringify_vma_type(vma),
 			   vma->node.start, vma->node.size,
-			   stringify_page_sizes(vma->page_sizes.gtt, NULL, 0));
+			   stringify_page_sizes(vma->resource->page_sizes_gtt,
+						NULL, 0));
 		if (i915_vma_is_ggtt(vma) || i915_vma_is_dpt(vma)) {
 			switch (vma->ggtt_view.type) {
 			case I915_GGTT_VIEW_NORMAL:
@@ -391,9 +404,9 @@ static int i915_swizzle_info(struct seq_file *m, void *data)
 	intel_wakeref_t wakeref;
 
 	seq_printf(m, "bit6 swizzle for X-tiling = %s\n",
-		   swizzle_string(dev_priv->ggtt.bit_6_swizzle_x));
+		   swizzle_string(to_gt(dev_priv)->ggtt->bit_6_swizzle_x));
 	seq_printf(m, "bit6 swizzle for Y-tiling = %s\n",
-		   swizzle_string(dev_priv->ggtt.bit_6_swizzle_y));
+		   swizzle_string(to_gt(dev_priv)->ggtt->bit_6_swizzle_y));
 
 	if (dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES)
 		seq_puts(m, "L-shaped memory detected\n");
diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c
index 4b56d4d83e75847a941a8ed29761a86ca14c66cb..1c67ff735f185f9e22b491c10b57845d3dee68f1 100644
--- a/drivers/gpu/drm/i915/i915_driver.c
+++ b/drivers/gpu/drm/i915/i915_driver.c
@@ -62,6 +62,8 @@
 #include "display/intel_vga.h"
 
 #include "gem/i915_gem_context.h"
+#include "gem/i915_gem_create.h"
+#include "gem/i915_gem_dmabuf.h"
 #include "gem/i915_gem_ioctls.h"
 #include "gem/i915_gem_mman.h"
 #include "gem/i915_gem_pm.h"
@@ -71,6 +73,7 @@
 
 #include "pxp/intel_pxp_pm.h"
 
+#include "i915_file_private.h"
 #include "i915_debugfs.h"
 #include "i915_driver.h"
 #include "i915_drv.h"
@@ -574,6 +577,10 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv)
 
 	i915_perf_init(dev_priv);
 
+	ret = intel_gt_assign_ggtt(to_gt(dev_priv));
+	if (ret)
+		goto err_perf;
+
 	ret = i915_ggtt_probe_hw(dev_priv);
 	if (ret)
 		goto err_perf;
@@ -590,8 +597,6 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv)
 	if (ret)
 		goto err_ggtt;
 
-	intel_gt_init_hw_early(to_gt(dev_priv), &dev_priv->ggtt);
-
 	ret = intel_gt_probe_lmem(to_gt(dev_priv));
 	if (ret)
 		goto err_mem_regions;
@@ -1149,7 +1154,7 @@ static int i915_drm_suspend(struct drm_device *dev)
 
 	/* Must be called before GGTT is suspended. */
 	intel_dpt_suspend(dev_priv);
-	i915_ggtt_suspend(&dev_priv->ggtt);
+	i915_ggtt_suspend(to_gt(dev_priv)->ggtt);
 
 	i915_save_display(dev_priv);
 
@@ -1273,7 +1278,7 @@ static int i915_drm_resume(struct drm_device *dev)
 	if (ret)
 		drm_err(&dev_priv->drm, "failed to re-enable GGTT\n");
 
-	i915_ggtt_resume(&dev_priv->ggtt);
+	i915_ggtt_resume(to_gt(dev_priv)->ggtt);
 	/* Must be called after GGTT is resumed. */
 	intel_dpt_resume(dev_priv);
 
@@ -1821,6 +1826,21 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(I915_GEM_VM_DESTROY, i915_gem_vm_destroy_ioctl, DRM_RENDER_ALLOW),
 };
 
+/*
+ * Interface history:
+ *
+ * 1.1: Original.
+ * 1.2: Add Power Management
+ * 1.3: Add vblank support
+ * 1.4: Fix cmdbuffer path, add heap destroy
+ * 1.5: Add vblank pipe configuration
+ * 1.6: - New ioctl for scheduling buffer swaps on vertical blank
+ *      - Support vertical blank on secondary display pipe
+ */
+#define DRIVER_MAJOR		1
+#define DRIVER_MINOR		6
+#define DRIVER_PATCHLEVEL	0
+
 static const struct drm_driver i915_drm_driver = {
 	/* Don't use MTRRs here; the Xserver or userspace app should
 	 * deal with them for Intel hardware.
diff --git a/drivers/gpu/drm/i915/i915_driver.h b/drivers/gpu/drm/i915/i915_driver.h
index 9ef8db4aa0a6f596e7348d2517e129420f689add..9d11de65daafc399659934876eaccee8947fb2ee 100644
--- a/drivers/gpu/drm/i915/i915_driver.h
+++ b/drivers/gpu/drm/i915/i915_driver.h
@@ -12,6 +12,11 @@ struct pci_dev;
 struct pci_device_id;
 struct drm_i915_private;
 
+#define DRIVER_NAME		"i915"
+#define DRIVER_DESC		"Intel Graphics"
+#define DRIVER_DATE		"20201103"
+#define DRIVER_TIMESTAMP	1604406085
+
 extern const struct dev_pm_ops i915_pm_ops;
 
 int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f7bdb973c880dab04142591b25c227a4e6d9c33c..f600d1cb01b34b598f9d0b6b8df80c1ad10902a5 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -31,38 +31,17 @@
 #define _I915_DRV_H_
 
 #include <uapi/drm/i915_drm.h>
-#include <uapi/drm/drm_fourcc.h>
 
 #include <asm/hypervisor.h>
 
-#include <linux/io-mapping.h>
 #include <linux/i2c.h>
 #include <linux/i2c-algo-bit.h>
-#include <linux/backlight.h>
-#include <linux/hash.h>
 #include <linux/intel-iommu.h>
-#include <linux/kref.h>
-#include <linux/mm_types.h>
-#include <linux/perf_event.h>
 #include <linux/pm_qos.h>
-#include <linux/dma-resv.h>
-#include <linux/shmem_fs.h>
-#include <linux/stackdepot.h>
-#include <linux/xarray.h>
-
-#include <drm/drm_gem.h>
-#include <drm/drm_auth.h>
-#include <drm/drm_cache.h>
-#include <drm/drm_util.h>
-#include <drm/drm_dsc.h>
-#include <drm/drm_atomic.h>
+
 #include <drm/drm_connector.h>
-#include <drm/i915_mei_hdcp_interface.h>
 #include <drm/ttm/ttm_device.h>
 
-#include "i915_params.h"
-#include "i915_utils.h"
-
 #include "display/intel_bios.h"
 #include "display/intel_cdclk.h"
 #include "display/intel_display.h"
@@ -77,9 +56,9 @@
 #include "display/intel_opregion.h"
 
 #include "gem/i915_gem_context_types.h"
+#include "gem/i915_gem_lmem.h"
 #include "gem/i915_gem_shrinker.h"
 #include "gem/i915_gem_stolen.h"
-#include "gem/i915_gem_lmem.h"
 
 #include "gt/intel_engine.h"
 #include "gt/intel_gt_types.h"
@@ -87,6 +66,12 @@
 #include "gt/intel_workarounds.h"
 #include "gt/uc/intel_uc.h"
 
+#include "i915_gem.h"
+#include "i915_gpu_error.h"
+#include "i915_params.h"
+#include "i915_perf_types.h"
+#include "i915_scheduler.h"
+#include "i915_utils.h"
 #include "intel_device_info.h"
 #include "intel_memory_region.h"
 #include "intel_pch.h"
@@ -94,28 +79,32 @@
 #include "intel_runtime_pm.h"
 #include "intel_step.h"
 #include "intel_uncore.h"
-#include "intel_wakeref.h"
 #include "intel_wopcm.h"
 
-#include "i915_gem.h"
-#include "i915_gem_gtt.h"
-#include "i915_gpu_error.h"
-#include "i915_perf_types.h"
-#include "i915_request.h"
-#include "i915_scheduler.h"
-#include "gt/intel_timeline.h"
-#include "i915_vma.h"
-
-
-/* General customization:
- */
-
-#define DRIVER_NAME		"i915"
-#define DRIVER_DESC		"Intel Graphics"
-#define DRIVER_DATE		"20201103"
-#define DRIVER_TIMESTAMP	1604406085
-
+struct dpll;
+struct drm_i915_clock_gating_funcs;
 struct drm_i915_gem_object;
+struct drm_i915_private;
+struct intel_atomic_state;
+struct intel_audio_funcs;
+struct intel_cdclk_config;
+struct intel_cdclk_funcs;
+struct intel_cdclk_state;
+struct intel_cdclk_vals;
+struct intel_color_funcs;
+struct intel_connector;
+struct intel_crtc;
+struct intel_dp;
+struct intel_dpll_funcs;
+struct intel_encoder;
+struct intel_fbdev;
+struct intel_fdi_funcs;
+struct intel_hotplug_funcs;
+struct intel_initial_plane_config;
+struct intel_limit;
+struct intel_overlay;
+struct intel_overlay_error_state;
+struct vlv_s0ix_state;
 
 /* Threshold == 5 for long IRQs, 50 for short */
 #define HPD_STORM_DEFAULT_THRESHOLD 50
@@ -166,117 +155,6 @@ struct i915_hotplug {
 	 I915_GEM_DOMAIN_INSTRUCTION | \
 	 I915_GEM_DOMAIN_VERTEX)
 
-struct drm_i915_private;
-
-struct drm_i915_file_private {
-	struct drm_i915_private *dev_priv;
-
-	union {
-		struct drm_file *file;
-		struct rcu_head rcu;
-	};
-
-	/** @proto_context_lock: Guards all struct i915_gem_proto_context
-	 * operations
-	 *
-	 * This not only guards @proto_context_xa, but is always held
-	 * whenever we manipulate any struct i915_gem_proto_context,
-	 * including finalizing it on first actual use of the GEM context.
-	 *
-	 * See i915_gem_proto_context.
-	 */
-	struct mutex proto_context_lock;
-
-	/** @proto_context_xa: xarray of struct i915_gem_proto_context
-	 *
-	 * Historically, the context uAPI allowed for two methods of
-	 * setting context parameters: SET_CONTEXT_PARAM and
-	 * CONTEXT_CREATE_EXT_SETPARAM.  The former is allowed to be called
-	 * at any time while the later happens as part of
-	 * GEM_CONTEXT_CREATE.  Everything settable via one was settable
-	 * via the other.  While some params are fairly simple and setting
-	 * them on a live context is harmless such as the context priority,
-	 * others are far trickier such as the VM or the set of engines.
-	 * In order to swap out the VM, for instance, we have to delay
-	 * until all current in-flight work is complete, swap in the new
-	 * VM, and then continue.  This leads to a plethora of potential
-	 * race conditions we'd really rather avoid.
-	 *
-	 * We have since disallowed setting these more complex parameters
-	 * on active contexts.  This works by delaying the creation of the
-	 * actual context until after the client is done configuring it
-	 * with SET_CONTEXT_PARAM.  From the perspective of the client, it
-	 * has the same u32 context ID the whole time.  From the
-	 * perspective of i915, however, it's a struct i915_gem_proto_context
-	 * right up until the point where we attempt to do something which
-	 * the proto-context can't handle.  Then the struct i915_gem_context
-	 * gets created.
-	 *
-	 * This is accomplished via a little xarray dance.  When
-	 * GEM_CONTEXT_CREATE is called, we create a struct
-	 * i915_gem_proto_context, reserve a slot in @context_xa but leave
-	 * it NULL, and place the proto-context in the corresponding slot
-	 * in @proto_context_xa.  Then, in i915_gem_context_lookup(), we
-	 * first check @context_xa.  If it's there, we return the struct
-	 * i915_gem_context and we're done.  If it's not, we look in
-	 * @proto_context_xa and, if we find it there, we create the actual
-	 * context and kill the proto-context.
-	 *
-	 * In order for this dance to work properly, everything which ever
-	 * touches a struct i915_gem_proto_context is guarded by
-	 * @proto_context_lock, including context creation.  Yes, this
-	 * means context creation now takes a giant global lock but it
-	 * can't really be helped and that should never be on any driver's
-	 * fast-path anyway.
-	 */
-	struct xarray proto_context_xa;
-
-	/** @context_xa: xarray of fully created i915_gem_context
-	 *
-	 * Write access to this xarray is guarded by @proto_context_lock.
-	 * Otherwise, writers may race with finalize_create_context_locked().
-	 *
-	 * See @proto_context_xa.
-	 */
-	struct xarray context_xa;
-	struct xarray vm_xa;
-
-	unsigned int bsd_engine;
-
-/*
- * Every context ban increments per client ban score. Also
- * hangs in short succession increments ban score. If ban threshold
- * is reached, client is considered banned and submitting more work
- * will fail. This is a stop gap measure to limit the badly behaving
- * clients access to gpu. Note that unbannable contexts never increment
- * the client ban score.
- */
-#define I915_CLIENT_SCORE_HANG_FAST	1
-#define   I915_CLIENT_FAST_HANG_JIFFIES (60 * HZ)
-#define I915_CLIENT_SCORE_CONTEXT_BAN   3
-#define I915_CLIENT_SCORE_BANNED	9
-	/** ban_score: Accumulated score of all ctx bans and fast hangs. */
-	atomic_t ban_score;
-	unsigned long hang_timestamp;
-};
-
-/* Interface history:
- *
- * 1.1: Original.
- * 1.2: Add Power Management
- * 1.3: Add vblank support
- * 1.4: Fix cmdbuffer path, add heap destroy
- * 1.5: Add vblank pipe configuration
- * 1.6: - New ioctl for scheduling buffer swaps on vertical blank
- *      - Support vertical blank on secondary display pipe
- */
-#define DRIVER_MAJOR		1
-#define DRIVER_MINOR		6
-#define DRIVER_PATCHLEVEL	0
-
-struct intel_overlay;
-struct intel_overlay_error_state;
-
 struct sdvo_device_mapping {
 	u8 initialized;
 	u8 dvo_port;
@@ -286,23 +164,6 @@ struct sdvo_device_mapping {
 	u8 ddc_pin;
 };
 
-struct intel_connector;
-struct intel_encoder;
-struct intel_atomic_state;
-struct intel_cdclk_config;
-struct intel_cdclk_funcs;
-struct intel_cdclk_state;
-struct intel_cdclk_vals;
-struct intel_initial_plane_config;
-struct intel_crtc;
-struct intel_limit;
-struct dpll;
-
-/* functions used internal in intel_pm.c */
-struct drm_i915_clock_gating_funcs {
-	void (*init_clock_gating)(struct drm_i915_private *dev_priv);
-};
-
 /* functions used for watermark calcs for display. */
 struct drm_i915_wm_disp_funcs {
 	/* update_wm is for legacy wm management */
@@ -320,38 +181,6 @@ struct drm_i915_wm_disp_funcs {
 	int (*compute_global_watermarks)(struct intel_atomic_state *state);
 };
 
-struct intel_color_funcs {
-	int (*color_check)(struct intel_crtc_state *crtc_state);
-	/*
-	 * Program double buffered color management registers during
-	 * vblank evasion. The registers should then latch during the
-	 * next vblank start, alongside any other double buffered registers
-	 * involved with the same commit.
-	 */
-	void (*color_commit)(const struct intel_crtc_state *crtc_state);
-	/*
-	 * Load LUTs (and other single buffered color management
-	 * registers). Will (hopefully) be called during the vblank
-	 * following the latching of any double buffered registers
-	 * involved with the same commit.
-	 */
-	void (*load_luts)(const struct intel_crtc_state *crtc_state);
-	void (*read_luts)(struct intel_crtc_state *crtc_state);
-};
-
-struct intel_hotplug_funcs {
-	void (*hpd_irq_setup)(struct drm_i915_private *dev_priv);
-};
-
-struct intel_fdi_funcs {
-	void (*fdi_link_train)(struct intel_crtc *crtc,
-			       const struct intel_crtc_state *crtc_state);
-};
-
-struct intel_dpll_funcs {
-	int (*crtc_compute_clock)(struct intel_crtc_state *crtc_state);
-};
-
 struct drm_i915_display_funcs {
 	/* Returns the active state of the crtc, and if the crtc is active,
 	 * fills out the pipe-config with the hw state. */
@@ -385,7 +214,6 @@ enum drrs_support_type {
 	SEAMLESS_DRRS_SUPPORT = 2
 };
 
-struct intel_dp;
 struct i915_drrs {
 	struct mutex mutex;
 	struct delayed_work work;
@@ -403,8 +231,6 @@ struct i915_drrs {
 #define QUIRK_INCREASE_DDI_DISABLED_TIME (1<<7)
 #define QUIRK_NO_PPS_BACKLIGHT_POWER_HOOK (1<<8)
 
-struct intel_fbdev;
-
 struct intel_gmbus {
 	struct i2c_adapter adapter;
 #define GMBUS_FORCE_BIT_RETRY (1U << 31)
@@ -423,8 +249,6 @@ struct i915_suspend_saved_registers {
 	u16 saveGCDGMBUS;
 };
 
-struct vlv_s0ix_state;
-
 #define MAX_L3_SLICES 2
 struct intel_l3_parity {
 	u32 *remap_info[MAX_L3_SLICES];
@@ -533,6 +357,9 @@ struct intel_vbt_data {
 	unsigned int bios_lvds_val; /* initial [PCH_]LVDS reg val in VBIOS */
 	enum drm_panel_orientation orientation;
 
+	bool override_afc_startup;
+	u8 override_afc_startup_val;
+
 	enum drrs_support_type drrs_type;
 
 	struct {
@@ -613,7 +440,6 @@ struct i915_selftest_stash {
 };
 
 /* intel_audio.c private */
-struct intel_audio_funcs;
 struct intel_audio_private {
 	/* Display internal audio functions */
 	const struct intel_audio_funcs *funcs;
@@ -815,8 +641,6 @@ struct drm_i915_private {
 	struct drm_atomic_state *modeset_restore_state;
 	struct drm_modeset_acquire_ctx reset_ctx;
 
-	struct i915_ggtt ggtt; /* VM representing the global address space */
-
 	struct i915_gem_mm mm;
 
 	/* Kernel Modesetting */
@@ -1263,6 +1087,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
 	IS_SUBPLATFORM(dev_priv, INTEL_DG2, INTEL_SUBPLATFORM_G10)
 #define IS_DG2_G11(dev_priv) \
 	IS_SUBPLATFORM(dev_priv, INTEL_DG2, INTEL_SUBPLATFORM_G11)
+#define IS_DG2_G12(dev_priv) \
+	IS_SUBPLATFORM(dev_priv, INTEL_DG2, INTEL_SUBPLATFORM_G12)
 #define IS_ADLS_RPLS(dev_priv) \
 	IS_SUBPLATFORM(dev_priv, INTEL_ALDERLAKE_S, INTEL_SUBPLATFORM_RPL_S)
 #define IS_ADLP_N(dev_priv) \
@@ -1379,16 +1205,17 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
 	(IS_XEHPSDV(__i915) && IS_GRAPHICS_STEP(__i915, since, until))
 
 /*
- * DG2 hardware steppings are a bit unusual.  The hardware design was forked
- * to create two variants (G10 and G11) which have distinct workaround sets.
- * The G11 fork of the DG2 design resets the GT stepping back to "A0" for its
- * first iteration, even though it's more similar to a G10 B0 stepping in terms
- * of functionality and workarounds.  However the display stepping does not
- * reset in the same manner --- a specific stepping like "B0" has a consistent
- * meaning regardless of whether it belongs to a G10 or G11 DG2.
+ * DG2 hardware steppings are a bit unusual.  The hardware design was forked to
+ * create three variants (G10, G11, and G12) which each have distinct
+ * workaround sets.  The G11 and G12 forks of the DG2 design reset the GT
+ * stepping back to "A0" for their first iterations, even though they're more
+ * similar to a G10 B0 stepping and G10 C0 stepping respectively in terms of
+ * functionality and workarounds.  However the display stepping does not reset
+ * in the same manner --- a specific stepping like "B0" has a consistent
+ * meaning regardless of whether it belongs to a G10, G11, or G12 DG2.
  *
  * TLDR:  All GT workarounds and stepping-specific logic must be applied in
- * relation to a specific subplatform (G10 or G11), whereas display workarounds
+ * relation to a specific subplatform (G10/G11/G12), whereas display workarounds
  * and stepping-specific logic will be applied with a general DG2-wide stepping
  * number.
  */
@@ -1558,6 +1385,9 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
 #define INTEL_DISPLAY_ENABLED(dev_priv) \
 	(drm_WARN_ON(&(dev_priv)->drm, !HAS_DISPLAY(dev_priv)), !(dev_priv)->params.disable_display)
 
+#define HAS_GUC_DEPRIVILEGE(dev_priv) \
+	(INTEL_INFO(dev_priv)->has_guc_deprivilege)
+
 static inline bool run_as_guest(void)
 {
 	return !hypervisor_is_type(X86_HYPER_NATIVE);
@@ -1654,79 +1484,29 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj,
 #define I915_GEM_OBJECT_UNBIND_BARRIER BIT(1)
 #define I915_GEM_OBJECT_UNBIND_TEST BIT(2)
 #define I915_GEM_OBJECT_UNBIND_VM_TRYLOCK BIT(3)
+#define I915_GEM_OBJECT_UNBIND_ASYNC BIT(4)
 
 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv);
 
-int i915_gem_dumb_create(struct drm_file *file_priv,
-			 struct drm_device *dev,
-			 struct drm_mode_create_dumb *args);
-
 int __must_check i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno);
 
-static inline u32 i915_reset_count(struct i915_gpu_error *error)
-{
-	return atomic_read(&error->reset_count);
-}
-
-static inline u32 i915_reset_engine_count(struct i915_gpu_error *error,
-					  const struct intel_engine_cs *engine)
-{
-	return atomic_read(&error->reset_engine_count[engine->uabi_class]);
-}
-
 int __must_check i915_gem_init(struct drm_i915_private *dev_priv);
 void i915_gem_driver_register(struct drm_i915_private *i915);
 void i915_gem_driver_unregister(struct drm_i915_private *i915);
 void i915_gem_driver_remove(struct drm_i915_private *dev_priv);
 void i915_gem_driver_release(struct drm_i915_private *dev_priv);
-void i915_gem_suspend(struct drm_i915_private *dev_priv);
-void i915_gem_suspend_late(struct drm_i915_private *dev_priv);
-void i915_gem_resume(struct drm_i915_private *dev_priv);
 
 int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file);
 
-int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
-				    enum i915_cache_level cache_level);
-
-struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
-				struct dma_buf *dma_buf);
-
-struct dma_buf *i915_gem_prime_export(struct drm_gem_object *gem_obj, int flags);
-
-static inline struct i915_address_space *
-i915_gem_vm_lookup(struct drm_i915_file_private *file_priv, u32 id)
-{
-	struct i915_address_space *vm;
-
-	xa_lock(&file_priv->vm_xa);
-	vm = xa_load(&file_priv->vm_xa, id);
-	if (vm)
-		kref_get(&vm->ref);
-	xa_unlock(&file_priv->vm_xa);
-
-	return vm;
-}
-
-/* i915_gem_internal.c */
-struct drm_i915_gem_object *
-i915_gem_object_create_internal(struct drm_i915_private *dev_priv,
-				phys_addr_t size);
-struct drm_i915_gem_object *
-__i915_gem_object_create_internal(struct drm_i915_private *dev_priv,
-				  const struct drm_i915_gem_object_ops *ops,
-				  phys_addr_t size);
-
 /* i915_gem_tiling.c */
 static inline bool i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
 {
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 
-	return i915->ggtt.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
+	return to_gt(i915)->ggtt->bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
 		i915_gem_object_is_tiled(obj);
 }
 
-const char *i915_cache_level_str(struct drm_i915_private *i915, int type);
-
 /* intel_device_info.c */
 static inline struct intel_device_info *
 mkwrite_device_info(struct drm_i915_private *dev_priv)
@@ -1734,14 +1514,6 @@ mkwrite_device_info(struct drm_i915_private *dev_priv)
 	return (struct intel_device_info *)INTEL_INFO(dev_priv);
 }
 
-static inline int intel_hws_csb_write_index(struct drm_i915_private *i915)
-{
-	if (GRAPHICS_VER(i915) >= 11)
-		return ICL_HWS_CSB_WRITE_INDEX;
-	else
-		return I915_HWS_CSB_WRITE_INDEX;
-}
-
 static inline enum i915_map_type
 i915_coherent_map_type(struct drm_i915_private *i915,
 		       struct drm_i915_gem_object *obj, bool always_coherent)
diff --git a/drivers/gpu/drm/i915/i915_file_private.h b/drivers/gpu/drm/i915/i915_file_private.h
new file mode 100644
index 0000000000000000000000000000000000000000..fb16cc431b2a582742f2a335b1e4cbec755b92e5
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_file_private.h
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef __I915_FILE_PRIVATE_H__
+#define __I915_FILE_PRIVATE_H__
+
+#include <linux/mutex.h>
+#include <linux/types.h>
+#include <linux/xarray.h>
+
+struct drm_i915_private;
+struct drm_file;
+
+struct drm_i915_file_private {
+	struct drm_i915_private *dev_priv;
+
+	union {
+		struct drm_file *file;
+		struct rcu_head rcu;
+	};
+
+	/** @proto_context_lock: Guards all struct i915_gem_proto_context
+	 * operations
+	 *
+	 * This not only guards @proto_context_xa, but is always held
+	 * whenever we manipulate any struct i915_gem_proto_context,
+	 * including finalizing it on first actual use of the GEM context.
+	 *
+	 * See i915_gem_proto_context.
+	 */
+	struct mutex proto_context_lock;
+
+	/** @proto_context_xa: xarray of struct i915_gem_proto_context
+	 *
+	 * Historically, the context uAPI allowed for two methods of
+	 * setting context parameters: SET_CONTEXT_PARAM and
+	 * CONTEXT_CREATE_EXT_SETPARAM.  The former is allowed to be called
+	 * at any time while the later happens as part of
+	 * GEM_CONTEXT_CREATE.  Everything settable via one was settable
+	 * via the other.  While some params are fairly simple and setting
+	 * them on a live context is harmless such as the context priority,
+	 * others are far trickier such as the VM or the set of engines.
+	 * In order to swap out the VM, for instance, we have to delay
+	 * until all current in-flight work is complete, swap in the new
+	 * VM, and then continue.  This leads to a plethora of potential
+	 * race conditions we'd really rather avoid.
+	 *
+	 * We have since disallowed setting these more complex parameters
+	 * on active contexts.  This works by delaying the creation of the
+	 * actual context until after the client is done configuring it
+	 * with SET_CONTEXT_PARAM.  From the perspective of the client, it
+	 * has the same u32 context ID the whole time.  From the
+	 * perspective of i915, however, it's a struct i915_gem_proto_context
+	 * right up until the point where we attempt to do something which
+	 * the proto-context can't handle.  Then the struct i915_gem_context
+	 * gets created.
+	 *
+	 * This is accomplished via a little xarray dance.  When
+	 * GEM_CONTEXT_CREATE is called, we create a struct
+	 * i915_gem_proto_context, reserve a slot in @context_xa but leave
+	 * it NULL, and place the proto-context in the corresponding slot
+	 * in @proto_context_xa.  Then, in i915_gem_context_lookup(), we
+	 * first check @context_xa.  If it's there, we return the struct
+	 * i915_gem_context and we're done.  If it's not, we look in
+	 * @proto_context_xa and, if we find it there, we create the actual
+	 * context and kill the proto-context.
+	 *
+	 * In order for this dance to work properly, everything which ever
+	 * touches a struct i915_gem_proto_context is guarded by
+	 * @proto_context_lock, including context creation.  Yes, this
+	 * means context creation now takes a giant global lock but it
+	 * can't really be helped and that should never be on any driver's
+	 * fast-path anyway.
+	 */
+	struct xarray proto_context_xa;
+
+	/** @context_xa: xarray of fully created i915_gem_context
+	 *
+	 * Write access to this xarray is guarded by @proto_context_lock.
+	 * Otherwise, writers may race with finalize_create_context_locked().
+	 *
+	 * See @proto_context_xa.
+	 */
+	struct xarray context_xa;
+	struct xarray vm_xa;
+
+	unsigned int bsd_engine;
+
+/*
+ * Every context ban increments per client ban score. Also
+ * hangs in short succession increments ban score. If ban threshold
+ * is reached, client is considered banned and submitting more work
+ * will fail. This is a stop gap measure to limit the badly behaving
+ * clients access to gpu. Note that unbannable contexts never increment
+ * the client ban score.
+ */
+#define I915_CLIENT_SCORE_HANG_FAST	1
+#define   I915_CLIENT_FAST_HANG_JIFFIES (60 * HZ)
+#define I915_CLIENT_SCORE_CONTEXT_BAN   3
+#define I915_CLIENT_SCORE_BANNED	9
+	/** ban_score: Accumulated score of all ctx bans and fast hangs. */
+	atomic_t ban_score;
+	unsigned long hang_timestamp;
+};
+
+#endif /* __I915_FILE_PRIVATE_H__ */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 5ef959a9f594ed8904ee55bb6a03076f21a900dc..2e10187cd0a06f591f78c7df5324186e983d9797 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -25,7 +25,6 @@
  *
  */
 
-#include <drm/drm_vma_manager.h>
 #include <linux/dma-fence-array.h>
 #include <linux/kthread.h>
 #include <linux/dma-resv.h>
@@ -37,6 +36,9 @@
 #include <linux/dma-buf.h>
 #include <linux/mman.h>
 
+#include <drm/drm_cache.h>
+#include <drm/drm_vma_manager.h>
+
 #include "display/intel_display.h"
 #include "display/intel_frontbuffer.h"
 
@@ -44,6 +46,7 @@
 #include "gem/i915_gem_context.h"
 #include "gem/i915_gem_ioctls.h"
 #include "gem/i915_gem_mman.h"
+#include "gem/i915_gem_pm.h"
 #include "gem/i915_gem_region.h"
 #include "gem/i915_gem_userptr.h"
 #include "gt/intel_engine_user.h"
@@ -52,9 +55,9 @@
 #include "gt/intel_workarounds.h"
 
 #include "i915_drv.h"
+#include "i915_file_private.h"
 #include "i915_trace.h"
 #include "i915_vgpu.h"
-
 #include "intel_pm.h"
 
 static int
@@ -89,7 +92,8 @@ int
 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 			    struct drm_file *file)
 {
-	struct i915_ggtt *ggtt = &to_i915(dev)->ggtt;
+	struct drm_i915_private *i915 = to_i915(dev);
+	struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
 	struct drm_i915_gem_get_aperture *args = data;
 	struct i915_vma *vma;
 	u64 pinned;
@@ -119,6 +123,8 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj,
 	struct i915_vma *vma;
 	int ret;
 
+	assert_object_held(obj);
+
 	if (list_empty(&obj->vma.list))
 		return 0;
 
@@ -156,10 +162,16 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj,
 		spin_unlock(&obj->vma.lock);
 
 		if (vma) {
+			bool vm_trylock = !!(flags & I915_GEM_OBJECT_UNBIND_VM_TRYLOCK);
 			ret = -EBUSY;
-			if (flags & I915_GEM_OBJECT_UNBIND_ACTIVE ||
-			    !i915_vma_is_active(vma)) {
-				if (flags & I915_GEM_OBJECT_UNBIND_VM_TRYLOCK) {
+			if (flags & I915_GEM_OBJECT_UNBIND_ASYNC) {
+				assert_object_held(vma->obj);
+				ret = i915_vma_unbind_async(vma, vm_trylock);
+			}
+
+			if (ret == -EBUSY && (flags & I915_GEM_OBJECT_UNBIND_ACTIVE ||
+					      !i915_vma_is_active(vma))) {
+				if (vm_trylock) {
 					if (mutex_trylock(&vma->vm->mutex)) {
 						ret = __i915_vma_unbind(vma);
 						mutex_unlock(&vma->vm->mutex);
@@ -290,7 +302,7 @@ static struct i915_vma *i915_gem_gtt_prepare(struct drm_i915_gem_object *obj,
 					     bool write)
 {
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	struct i915_ggtt *ggtt = &i915->ggtt;
+	struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
 	struct i915_vma *vma;
 	struct i915_gem_ww_ctx ww;
 	int ret;
@@ -351,7 +363,7 @@ static void i915_gem_gtt_cleanup(struct drm_i915_gem_object *obj,
 				 struct i915_vma *vma)
 {
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	struct i915_ggtt *ggtt = &i915->ggtt;
+	struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
 
 	i915_gem_object_unpin_pages(obj);
 	if (drm_mm_node_allocated(node)) {
@@ -367,7 +379,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
 		   const struct drm_i915_gem_pread *args)
 {
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	struct i915_ggtt *ggtt = &i915->ggtt;
+	struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
 	intel_wakeref_t wakeref;
 	struct drm_mm_node node;
 	void __user *user_data;
@@ -523,7 +535,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
 			 const struct drm_i915_gem_pwrite *args)
 {
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	struct i915_ggtt *ggtt = &i915->ggtt;
+	struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
 	struct intel_runtime_pm *rpm = &i915->runtime_pm;
 	intel_wakeref_t wakeref;
 	struct drm_mm_node node;
@@ -824,7 +836,7 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915)
 	 */
 
 	list_for_each_entry_safe(obj, on,
-				 &i915->ggtt.userfault_list, userfault_link)
+				 &to_gt(i915)->ggtt->userfault_list, userfault_link)
 		__i915_gem_object_release_mmap_gtt(obj);
 
 	/*
@@ -832,8 +844,8 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915)
 	 * in use by hardware (i.e. they are pinned), we should not be powering
 	 * down! All other fences will be reacquired by the user upon waking.
 	 */
-	for (i = 0; i < i915->ggtt.num_fences; i++) {
-		struct i915_fence_reg *reg = &i915->ggtt.fence_regs[i];
+	for (i = 0; i < to_gt(i915)->ggtt->num_fences; i++) {
+		struct i915_fence_reg *reg = &to_gt(i915)->ggtt->fence_regs[i];
 
 		/*
 		 * Ideally we want to assert that the fence register is not
@@ -874,7 +886,7 @@ i915_gem_object_ggtt_pin_ww(struct drm_i915_gem_object *obj,
 			    u64 size, u64 alignment, u64 flags)
 {
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	struct i915_ggtt *ggtt = &i915->ggtt;
+	struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
 	struct i915_vma *vma;
 	int ret;
 
@@ -1124,7 +1136,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 
 		/* Minimal basic recovery for KMS */
 		ret = i915_ggtt_enable_hw(dev_priv);
-		i915_ggtt_resume(&dev_priv->ggtt);
+		i915_ggtt_resume(to_gt(dev_priv)->ggtt);
 		intel_init_clock_gating(dev_priv);
 	}
 
@@ -1147,7 +1159,7 @@ void i915_gem_driver_unregister(struct drm_i915_private *i915)
 
 void i915_gem_driver_remove(struct drm_i915_private *dev_priv)
 {
-	intel_wakeref_auto_fini(&dev_priv->ggtt.userfault_wakeref);
+	intel_wakeref_auto_fini(&to_gt(dev_priv)->ggtt->userfault_wakeref);
 
 	i915_gem_suspend_late(dev_priv);
 	intel_gt_driver_remove(to_gt(dev_priv));
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 24eee0c2055f3b491530b3e4e0c786d56e5ff8f6..f025ee4fa52618cb280864b322b84f5cd8ae2c1f 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -38,6 +38,11 @@ I915_SELFTEST_DECLARE(static struct igt_evict_ctl {
 	bool fail_if_busy:1;
 } igt_evict_ctl;)
 
+static bool dying_vma(struct i915_vma *vma)
+{
+	return !kref_read(&vma->obj->base.refcount);
+}
+
 static int ggtt_flush(struct intel_gt *gt)
 {
 	/*
@@ -50,8 +55,37 @@ static int ggtt_flush(struct intel_gt *gt)
 	return intel_gt_wait_for_idle(gt, MAX_SCHEDULE_TIMEOUT);
 }
 
+static bool grab_vma(struct i915_vma *vma, struct i915_gem_ww_ctx *ww)
+{
+	/*
+	 * We add the extra refcount so the object doesn't drop to zero until
+	 * after ungrab_vma(), this way trylock is always paired with unlock.
+	 */
+	if (i915_gem_object_get_rcu(vma->obj)) {
+		if (!i915_gem_object_trylock(vma->obj, ww)) {
+			i915_gem_object_put(vma->obj);
+			return false;
+		}
+	} else {
+		/* Dead objects don't need pins */
+		atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
+	}
+
+	return true;
+}
+
+static void ungrab_vma(struct i915_vma *vma)
+{
+	if (dying_vma(vma))
+		return;
+
+	i915_gem_object_unlock(vma->obj);
+	i915_gem_object_put(vma->obj);
+}
+
 static bool
 mark_free(struct drm_mm_scan *scan,
+	  struct i915_gem_ww_ctx *ww,
 	  struct i915_vma *vma,
 	  unsigned int flags,
 	  struct list_head *unwind)
@@ -59,6 +93,9 @@ mark_free(struct drm_mm_scan *scan,
 	if (i915_vma_is_pinned(vma))
 		return false;
 
+	if (!grab_vma(vma, ww))
+		return false;
+
 	list_add(&vma->evict_link, unwind);
 	return drm_mm_scan_add_block(scan, &vma->node);
 }
@@ -77,6 +114,7 @@ static bool defer_evict(struct i915_vma *vma)
 /**
  * i915_gem_evict_something - Evict vmas to make room for binding a new one
  * @vm: address space to evict from
+ * @ww: An optional struct i915_gem_ww_ctx.
  * @min_size: size of the desired free space
  * @alignment: alignment constraint of the desired free space
  * @color: color for the desired space
@@ -99,6 +137,7 @@ static bool defer_evict(struct i915_vma *vma)
  */
 int
 i915_gem_evict_something(struct i915_address_space *vm,
+			 struct i915_gem_ww_ctx *ww,
 			 u64 min_size, u64 alignment,
 			 unsigned long color,
 			 u64 start, u64 end,
@@ -171,7 +210,7 @@ i915_gem_evict_something(struct i915_address_space *vm,
 			continue;
 		}
 
-		if (mark_free(&scan, vma, flags, &eviction_list))
+		if (mark_free(&scan, ww, vma, flags, &eviction_list))
 			goto found;
 	}
 
@@ -179,6 +218,7 @@ i915_gem_evict_something(struct i915_address_space *vm,
 	list_for_each_entry_safe(vma, next, &eviction_list, evict_link) {
 		ret = drm_mm_scan_remove_block(&scan, &vma->node);
 		BUG_ON(ret);
+		ungrab_vma(vma);
 	}
 
 	/*
@@ -223,10 +263,12 @@ i915_gem_evict_something(struct i915_address_space *vm,
 	 * of any of our objects, thus corrupting the list).
 	 */
 	list_for_each_entry_safe(vma, next, &eviction_list, evict_link) {
-		if (drm_mm_scan_remove_block(&scan, &vma->node))
+		if (drm_mm_scan_remove_block(&scan, &vma->node)) {
 			__i915_vma_pin(vma);
-		else
+		} else {
 			list_del(&vma->evict_link);
+			ungrab_vma(vma);
+		}
 	}
 
 	/* Unbinding will emit any required flushes */
@@ -235,16 +277,20 @@ i915_gem_evict_something(struct i915_address_space *vm,
 		__i915_vma_unpin(vma);
 		if (ret == 0)
 			ret = __i915_vma_unbind(vma);
+		ungrab_vma(vma);
 	}
 
 	while (ret == 0 && (node = drm_mm_scan_color_evict(&scan))) {
 		vma = container_of(node, struct i915_vma, node);
 
 		/* If we find any non-objects (!vma), we cannot evict them */
-		if (vma->node.color != I915_COLOR_UNEVICTABLE)
+		if (vma->node.color != I915_COLOR_UNEVICTABLE &&
+		    grab_vma(vma, ww)) {
 			ret = __i915_vma_unbind(vma);
-		else
-			ret = -ENOSPC; /* XXX search failed, try again? */
+			ungrab_vma(vma);
+		} else {
+			ret = -ENOSPC;
+		}
 	}
 
 	return ret;
@@ -253,6 +299,7 @@ i915_gem_evict_something(struct i915_address_space *vm,
 /**
  * i915_gem_evict_for_node - Evict vmas to make room for binding a new one
  * @vm: address space to evict from
+ * @ww: An optional struct i915_gem_ww_ctx.
  * @target: range (and color) to evict for
  * @flags: additional flags to control the eviction algorithm
  *
@@ -262,6 +309,7 @@ i915_gem_evict_something(struct i915_address_space *vm,
  * memory in e.g. the shrinker.
  */
 int i915_gem_evict_for_node(struct i915_address_space *vm,
+			    struct i915_gem_ww_ctx *ww,
 			    struct drm_mm_node *target,
 			    unsigned int flags)
 {
@@ -334,6 +382,11 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
 			break;
 		}
 
+		if (!grab_vma(vma, ww)) {
+			ret = -ENOSPC;
+			break;
+		}
+
 		/*
 		 * Never show fear in the face of dragons!
 		 *
@@ -351,6 +404,8 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
 		__i915_vma_unpin(vma);
 		if (ret == 0)
 			ret = __i915_vma_unbind(vma);
+
+		ungrab_vma(vma);
 	}
 
 	return ret;
@@ -359,6 +414,8 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
 /**
  * i915_gem_evict_vm - Evict all idle vmas from a vm
  * @vm: Address space to cleanse
+ * @ww: An optional struct i915_gem_ww_ctx. If not NULL, i915_gem_evict_vm
+ * will be able to evict vma's locked by the ww as well.
  *
  * This function evicts all vmas from a vm.
  *
@@ -368,7 +425,7 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
  * To clarify: This is for freeing up virtual address space, not for freeing
  * memory in e.g. the shrinker.
  */
-int i915_gem_evict_vm(struct i915_address_space *vm)
+int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww)
 {
 	int ret = 0;
 
@@ -389,24 +446,52 @@ int i915_gem_evict_vm(struct i915_address_space *vm)
 	do {
 		struct i915_vma *vma, *vn;
 		LIST_HEAD(eviction_list);
+		LIST_HEAD(locked_eviction_list);
 
 		list_for_each_entry(vma, &vm->bound_list, vm_link) {
 			if (i915_vma_is_pinned(vma))
 				continue;
 
+			/*
+			 * If we already own the lock, trylock fails. In case
+			 * the resv is shared among multiple objects, we still
+			 * need the object ref.
+			 */
+			if (dying_vma(vma) ||
+			    (ww && (dma_resv_locking_ctx(vma->obj->base.resv) == &ww->ctx))) {
+				__i915_vma_pin(vma);
+				list_add(&vma->evict_link, &locked_eviction_list);
+				continue;
+			}
+
+			if (!i915_gem_object_trylock(vma->obj, ww))
+				continue;
+
 			__i915_vma_pin(vma);
 			list_add(&vma->evict_link, &eviction_list);
 		}
-		if (list_empty(&eviction_list))
+		if (list_empty(&eviction_list) && list_empty(&locked_eviction_list))
 			break;
 
 		ret = 0;
+		/* Unbind locked objects first, before unlocking the eviction_list */
+		list_for_each_entry_safe(vma, vn, &locked_eviction_list, evict_link) {
+			__i915_vma_unpin(vma);
+
+			if (ret == 0)
+				ret = __i915_vma_unbind(vma);
+			if (ret != -EINTR) /* "Get me out of here!" */
+				ret = 0;
+		}
+
 		list_for_each_entry_safe(vma, vn, &eviction_list, evict_link) {
 			__i915_vma_unpin(vma);
 			if (ret == 0)
 				ret = __i915_vma_unbind(vma);
 			if (ret != -EINTR) /* "Get me out of here!" */
 				ret = 0;
+
+			i915_gem_object_unlock(vma->obj);
 		}
 	} while (ret == 0);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.h b/drivers/gpu/drm/i915/i915_gem_evict.h
index d4478b6ad11bb869f140d12ad440cd82453f8edc..e593c530f9bd7ac3be83a89733ff9e24ad471a47 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.h
+++ b/drivers/gpu/drm/i915/i915_gem_evict.h
@@ -10,15 +10,19 @@
 
 struct drm_mm_node;
 struct i915_address_space;
+struct i915_gem_ww_ctx;
 
 int __must_check i915_gem_evict_something(struct i915_address_space *vm,
+					  struct i915_gem_ww_ctx *ww,
 					  u64 min_size, u64 alignment,
 					  unsigned long color,
 					  u64 start, u64 end,
 					  unsigned flags);
 int __must_check i915_gem_evict_for_node(struct i915_address_space *vm,
+					 struct i915_gem_ww_ctx *ww,
 					 struct drm_mm_node *node,
 					 unsigned int flags);
-int i915_gem_evict_vm(struct i915_address_space *vm);
+int i915_gem_evict_vm(struct i915_address_space *vm,
+		      struct i915_gem_ww_ctx *ww);
 
 #endif /* __I915_GEM_EVICT_H__ */
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 8a7f0d92b56fd15d1818642bd3134f4bcd2a4c3a..329ff75b80b97d8cc33b8f2e320d3f7904bb4278 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -57,7 +57,7 @@ void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
 			       struct sg_table *pages)
 {
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	struct i915_ggtt *ggtt = &i915->ggtt;
+	struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
 
 	/* XXX This does not prevent more requests being submitted! */
 	if (unlikely(ggtt->do_idle_maps))
@@ -71,6 +71,7 @@ void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
 /**
  * i915_gem_gtt_reserve - reserve a node in an address_space (GTT)
  * @vm: the &struct i915_address_space
+ * @ww: An optional struct i915_gem_ww_ctx.
  * @node: the &struct drm_mm_node (typically i915_vma.mode)
  * @size: how much space to allocate inside the GTT,
  *        must be #I915_GTT_PAGE_SIZE aligned
@@ -94,6 +95,7 @@ void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
  * asked to wait for eviction and interrupted.
  */
 int i915_gem_gtt_reserve(struct i915_address_space *vm,
+			 struct i915_gem_ww_ctx *ww,
 			 struct drm_mm_node *node,
 			 u64 size, u64 offset, unsigned long color,
 			 unsigned int flags)
@@ -104,7 +106,7 @@ int i915_gem_gtt_reserve(struct i915_address_space *vm,
 	GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
 	GEM_BUG_ON(!IS_ALIGNED(offset, I915_GTT_MIN_ALIGNMENT));
 	GEM_BUG_ON(range_overflows(offset, size, vm->total));
-	GEM_BUG_ON(vm == &vm->i915->ggtt.alias->vm);
+	GEM_BUG_ON(vm == &to_gt(vm->i915)->ggtt->alias->vm);
 	GEM_BUG_ON(drm_mm_node_allocated(node));
 
 	node->size = size;
@@ -118,7 +120,7 @@ int i915_gem_gtt_reserve(struct i915_address_space *vm,
 	if (flags & PIN_NOEVICT)
 		return -ENOSPC;
 
-	err = i915_gem_evict_for_node(vm, node, flags);
+	err = i915_gem_evict_for_node(vm, ww, node, flags);
 	if (err == 0)
 		err = drm_mm_reserve_node(&vm->mm, node);
 
@@ -153,6 +155,7 @@ static u64 random_offset(u64 start, u64 end, u64 len, u64 align)
 /**
  * i915_gem_gtt_insert - insert a node into an address_space (GTT)
  * @vm: the &struct i915_address_space
+ * @ww: An optional struct i915_gem_ww_ctx.
  * @node: the &struct drm_mm_node (typically i915_vma.node)
  * @size: how much space to allocate inside the GTT,
  *        must be #I915_GTT_PAGE_SIZE aligned
@@ -185,6 +188,7 @@ static u64 random_offset(u64 start, u64 end, u64 len, u64 align)
  * asked to wait for eviction and interrupted.
  */
 int i915_gem_gtt_insert(struct i915_address_space *vm,
+			struct i915_gem_ww_ctx *ww,
 			struct drm_mm_node *node,
 			u64 size, u64 alignment, unsigned long color,
 			u64 start, u64 end, unsigned int flags)
@@ -202,7 +206,7 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
 	GEM_BUG_ON(start >= end);
 	GEM_BUG_ON(start > 0  && !IS_ALIGNED(start, I915_GTT_PAGE_SIZE));
 	GEM_BUG_ON(end < U64_MAX && !IS_ALIGNED(end, I915_GTT_PAGE_SIZE));
-	GEM_BUG_ON(vm == &vm->i915->ggtt.alias->vm);
+	GEM_BUG_ON(vm == &to_gt(vm->i915)->ggtt->alias->vm);
 	GEM_BUG_ON(drm_mm_node_allocated(node));
 
 	if (unlikely(range_overflows(start, size, end)))
@@ -270,7 +274,7 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
 	 */
 	offset = random_offset(start, end,
 			       size, alignment ?: I915_GTT_MIN_ALIGNMENT);
-	err = i915_gem_gtt_reserve(vm, node, size, offset, color, flags);
+	err = i915_gem_gtt_reserve(vm, ww, node, size, offset, color, flags);
 	if (err != -ENOSPC)
 		return err;
 
@@ -278,7 +282,7 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
 		return -ENOSPC;
 
 	/* Randomly selected placement is pinned, do a search */
-	err = i915_gem_evict_something(vm, size, alignment, color,
+	err = i915_gem_evict_something(vm, ww, size, alignment, color,
 				       start, end, flags);
 	if (err)
 		return err;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index c9b0ee5e1d237abf8f47ccd3c632d3f5c86a6908..8c2f57eb5ddaa19c1aa8cf30c78ac16610edf50e 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -16,6 +16,7 @@
 
 struct drm_i915_gem_object;
 struct i915_address_space;
+struct i915_gem_ww_ctx;
 
 int __must_check i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj,
 					    struct sg_table *pages);
@@ -23,11 +24,13 @@ void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
 			       struct sg_table *pages);
 
 int i915_gem_gtt_reserve(struct i915_address_space *vm,
+			 struct i915_gem_ww_ctx *ww,
 			 struct drm_mm_node *node,
 			 u64 size, u64 offset, unsigned long color,
 			 unsigned int flags);
 
 int i915_gem_gtt_insert(struct i915_address_space *vm,
+			struct i915_gem_ww_ctx *ww,
 			struct drm_mm_node *node,
 			u64 size, u64 alignment, unsigned long color,
 			u64 start, u64 end, unsigned int flags);
@@ -41,6 +44,7 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
 #define PIN_HIGH		BIT_ULL(5)
 #define PIN_OFFSET_BIAS		BIT_ULL(6)
 #define PIN_OFFSET_FIXED	BIT_ULL(7)
+#define PIN_VALIDATE		BIT_ULL(8) /* validate placement only, no need to call unpin() */
 
 #define PIN_GLOBAL		BIT_ULL(10) /* I915_VMA_GLOBAL_BIND */
 #define PIN_USER		BIT_ULL(11) /* I915_VMA_LOCAL_BIND */
diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c
index dbe49fd8728320be2da5d05be6835feeeedd74f3..c12a0adefda539498151f3f1573b6508fae9618b 100644
--- a/drivers/gpu/drm/i915/i915_getparam.c
+++ b/drivers/gpu/drm/i915/i915_getparam.c
@@ -33,7 +33,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
 		value = pdev->revision;
 		break;
 	case I915_PARAM_NUM_FENCES_AVAIL:
-		value = i915->ggtt.num_fences;
+		value = to_gt(i915)->ggtt->num_fences;
 		break;
 	case I915_PARAM_HAS_OVERLAY:
 		value = !!i915->overlay;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 102c7bb747f8427cde9087d4cbe399f799479288..1d042551619e202cfc72791a9a27ce53f8498ad0 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -34,6 +34,7 @@
 #include <linux/utsname.h>
 #include <linux/zlib.h>
 
+#include <drm/drm_cache.h>
 #include <drm/drm_print.h>
 
 #include "display/intel_dmc.h"
@@ -46,11 +47,11 @@
 #include "gt/intel_gt_pm.h"
 #include "gt/intel_gt_regs.h"
 
+#include "i915_driver.h"
 #include "i915_drv.h"
 #include "i915_gpu_error.h"
 #include "i915_memcpy.h"
 #include "i915_scatterlist.h"
-#include "i915_vma_snapshot.h"
 
 #define ALLOW_FAIL (__GFP_KSWAPD_RECLAIM | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
 #define ATOMIC_MAYFAIL (GFP_ATOMIC | __GFP_NOWARN)
@@ -1015,8 +1016,10 @@ void __i915_gpu_coredump_free(struct kref *error_ref)
 
 static struct i915_vma_coredump *
 i915_vma_coredump_create(const struct intel_gt *gt,
-			 const struct i915_vma_snapshot *vsnap,
-			 struct i915_vma_compress *compress)
+			 const struct i915_vma_resource *vma_res,
+			 struct i915_vma_compress *compress,
+			 const char *name)
+
 {
 	struct i915_ggtt *ggtt = gt->ggtt;
 	const u64 slot = ggtt->error_capture.start;
@@ -1026,7 +1029,7 @@ i915_vma_coredump_create(const struct intel_gt *gt,
 
 	might_sleep();
 
-	if (!vsnap || !vsnap->pages || !compress)
+	if (!vma_res || !vma_res->bi.pages || !compress)
 		return NULL;
 
 	dst = kmalloc(sizeof(*dst), ALLOW_FAIL);
@@ -1039,12 +1042,12 @@ i915_vma_coredump_create(const struct intel_gt *gt,
 	}
 
 	INIT_LIST_HEAD(&dst->page_list);
-	strcpy(dst->name, vsnap->name);
+	strcpy(dst->name, name);
 	dst->next = NULL;
 
-	dst->gtt_offset = vsnap->gtt_offset;
-	dst->gtt_size = vsnap->gtt_size;
-	dst->gtt_page_sizes = vsnap->page_sizes;
+	dst->gtt_offset = vma_res->start;
+	dst->gtt_size = vma_res->node_size;
+	dst->gtt_page_sizes = vma_res->page_sizes_gtt;
 	dst->unused = 0;
 
 	ret = -EINVAL;
@@ -1052,7 +1055,7 @@ i915_vma_coredump_create(const struct intel_gt *gt,
 		void __iomem *s;
 		dma_addr_t dma;
 
-		for_each_sgt_daddr(dma, iter, vsnap->pages) {
+		for_each_sgt_daddr(dma, iter, vma_res->bi.pages) {
 			mutex_lock(&ggtt->error_mutex);
 			ggtt->vm.insert_page(&ggtt->vm, dma, slot,
 					     I915_CACHE_NONE, 0);
@@ -1070,11 +1073,11 @@ i915_vma_coredump_create(const struct intel_gt *gt,
 			if (ret)
 				break;
 		}
-	} else if (vsnap->mr && vsnap->mr->type != INTEL_MEMORY_SYSTEM) {
-		struct intel_memory_region *mem = vsnap->mr;
+	} else if (vma_res->bi.lmem) {
+		struct intel_memory_region *mem = vma_res->mr;
 		dma_addr_t dma;
 
-		for_each_sgt_daddr(dma, iter, vsnap->pages) {
+		for_each_sgt_daddr(dma, iter, vma_res->bi.pages) {
 			void __iomem *s;
 
 			s = io_mapping_map_wc(&mem->iomap,
@@ -1090,7 +1093,7 @@ i915_vma_coredump_create(const struct intel_gt *gt,
 	} else {
 		struct page *page;
 
-		for_each_sgt_page(page, iter, vsnap->pages) {
+		for_each_sgt_page(page, iter, vma_res->bi.pages) {
 			void *s;
 
 			drm_clflush_pages(&page, 1);
@@ -1326,33 +1329,32 @@ static bool record_context(struct i915_gem_context_coredump *e,
 
 struct intel_engine_capture_vma {
 	struct intel_engine_capture_vma *next;
-	struct i915_vma_snapshot *vsnap;
+	struct i915_vma_resource *vma_res;
 	char name[16];
 	bool lockdep_cookie;
 };
 
 static struct intel_engine_capture_vma *
 capture_vma_snapshot(struct intel_engine_capture_vma *next,
-		     struct i915_vma_snapshot *vsnap,
-		     gfp_t gfp)
+		     struct i915_vma_resource *vma_res,
+		     gfp_t gfp, const char *name)
 {
 	struct intel_engine_capture_vma *c;
 
-	if (!i915_vma_snapshot_present(vsnap))
+	if (!vma_res)
 		return next;
 
 	c = kmalloc(sizeof(*c), gfp);
 	if (!c)
 		return next;
 
-	if (!i915_vma_snapshot_resource_pin(vsnap, &c->lockdep_cookie)) {
+	if (!i915_vma_resource_hold(vma_res, &c->lockdep_cookie)) {
 		kfree(c);
 		return next;
 	}
 
-	strcpy(c->name, vsnap->name);
-	c->vsnap = vsnap;
-	i915_vma_snapshot_get(vsnap);
+	strcpy(c->name, name);
+	c->vma_res = i915_vma_resource_get(vma_res);
 
 	c->next = next;
 	return c;
@@ -1364,8 +1366,6 @@ capture_vma(struct intel_engine_capture_vma *next,
 	    const char *name,
 	    gfp_t gfp)
 {
-	struct i915_vma_snapshot *vsnap;
-
 	if (!vma)
 		return next;
 
@@ -1374,19 +1374,10 @@ capture_vma(struct intel_engine_capture_vma *next,
 	 * to a struct i915_vma_snapshot at command submission time.
 	 * Not here.
 	 */
-	GEM_WARN_ON(!i915_vma_is_pinned(vma));
-	if (!i915_vma_is_pinned(vma))
-		return next;
-
-	vsnap = i915_vma_snapshot_alloc(gfp);
-	if (!vsnap)
+	if (GEM_WARN_ON(!i915_vma_is_pinned(vma)))
 		return next;
 
-	i915_vma_snapshot_init(vsnap, vma, name);
-	next = capture_vma_snapshot(next, vsnap, gfp);
-
-	/* FIXME: Replace on async unbind. */
-	i915_vma_snapshot_put(vsnap);
+	next = capture_vma_snapshot(next, vma->resource, gfp, name);
 
 	return next;
 }
@@ -1399,7 +1390,8 @@ capture_user(struct intel_engine_capture_vma *capture,
 	struct i915_capture_list *c;
 
 	for (c = rq->capture_list; c; c = c->next)
-		capture = capture_vma_snapshot(capture, c->vma_snapshot, gfp);
+		capture = capture_vma_snapshot(capture, c->vma_res, gfp,
+					       "user");
 
 	return capture;
 }
@@ -1417,16 +1409,19 @@ static struct i915_vma_coredump *
 create_vma_coredump(const struct intel_gt *gt, struct i915_vma *vma,
 		    const char *name, struct i915_vma_compress *compress)
 {
-	struct i915_vma_coredump *ret;
-	struct i915_vma_snapshot tmp;
+	struct i915_vma_coredump *ret = NULL;
+	struct i915_vma_resource *vma_res;
+	bool lockdep_cookie;
 
 	if (!vma)
 		return NULL;
 
-	GEM_WARN_ON(!i915_vma_is_pinned(vma));
-	i915_vma_snapshot_init_onstack(&tmp, vma, name);
-	ret = i915_vma_coredump_create(gt, &tmp, compress);
-	i915_vma_snapshot_put_onstack(&tmp);
+	vma_res = vma->resource;
+
+	if (i915_vma_resource_hold(vma_res, &lockdep_cookie)) {
+		ret = i915_vma_coredump_create(gt, vma_res, compress, name);
+		i915_vma_resource_unhold(vma_res, lockdep_cookie);
+	}
 
 	return ret;
 }
@@ -1473,7 +1468,7 @@ intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
 	 * as the simplest method to avoid being overwritten
 	 * by userspace.
 	 */
-	vma = capture_vma_snapshot(vma, &rq->batch_snapshot, gfp);
+	vma = capture_vma_snapshot(vma, rq->batch_res, gfp, "batch");
 	vma = capture_user(vma, rq, gfp);
 	vma = capture_vma(vma, rq->ring->vma, "ring", gfp);
 	vma = capture_vma(vma, rq->context->state, "HW context", gfp);
@@ -1494,14 +1489,14 @@ intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
 
 	while (capture) {
 		struct intel_engine_capture_vma *this = capture;
-		struct i915_vma_snapshot *vsnap = this->vsnap;
+		struct i915_vma_resource *vma_res = this->vma_res;
 
 		add_vma(ee,
-			i915_vma_coredump_create(engine->gt,
-						 vsnap, compress));
+			i915_vma_coredump_create(engine->gt, vma_res,
+						 compress, this->name));
 
-		i915_vma_snapshot_resource_unpin(vsnap, this->lockdep_cookie);
-		i915_vma_snapshot_put(vsnap);
+		i915_vma_resource_unhold(vma_res, this->lockdep_cookie);
+		i915_vma_resource_put(vma_res);
 
 		capture = this->next;
 		kfree(this);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 5aedf51298148899ac835ff0becd9236b8befecb..903d838e2e63e136849fa2fa7149e9c879c369fa 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -210,6 +210,17 @@ struct drm_i915_error_state_buf {
 	int err;
 };
 
+static inline u32 i915_reset_count(struct i915_gpu_error *error)
+{
+	return atomic_read(&error->reset_count);
+}
+
+static inline u32 i915_reset_engine_count(struct i915_gpu_error *error,
+					  const struct intel_engine_cs *engine)
+{
+	return atomic_read(&error->reset_engine_count[engine->uabi_class]);
+}
+
 #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
 
 __printf(2, 3)
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index c05eb09d8a66ecad277a3ff9cf319723e6f2b710..73cebc6aa65072040408a7291b335451e1956873 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -34,6 +34,7 @@
 
 #include <drm/drm_drv.h>
 
+#include "display/icl_dsi_regs.h"
 #include "display/intel_de.h"
 #include "display/intel_display_trace.h"
 #include "display/intel_display_types.h"
@@ -49,6 +50,7 @@
 #include "gt/intel_gt_regs.h"
 #include "gt/intel_rps.h"
 
+#include "i915_driver.h"
 #include "i915_drv.h"
 #include "i915_irq.h"
 #include "intel_pm.h"
@@ -178,6 +180,7 @@ static const u32 hpd_sde_dg1[HPD_NUM_PINS] = {
 	[HPD_PORT_B] = SDE_DDI_HOTPLUG_ICP(HPD_PORT_B),
 	[HPD_PORT_C] = SDE_DDI_HOTPLUG_ICP(HPD_PORT_C),
 	[HPD_PORT_D] = SDE_DDI_HOTPLUG_ICP(HPD_PORT_D),
+	[HPD_PORT_TC1] = SDE_TC_HOTPLUG_DG2(HPD_PORT_TC1),
 };
 
 static void intel_hpd_init_pins(struct drm_i915_private *dev_priv)
@@ -4347,6 +4350,10 @@ static irqreturn_t i965_irq_handler(int irq, void *arg)
 	return ret;
 }
 
+struct intel_hotplug_funcs {
+	void (*hpd_irq_setup)(struct drm_i915_private *i915);
+};
+
 #define HPD_FUNCS(platform)					 \
 static const struct intel_hotplug_funcs platform##_hpd_funcs = { \
 	.hpd_irq_setup = platform##_hpd_irq_setup,		 \
@@ -4361,6 +4368,12 @@ HPD_FUNCS(spt);
 HPD_FUNCS(ilk);
 #undef HPD_FUNCS
 
+void intel_hpd_irq_setup(struct drm_i915_private *i915)
+{
+	if (i915->display_irqs_enabled && i915->hotplug_funcs)
+		i915->hotplug_funcs->hpd_irq_setup(i915);
+}
+
 /**
  * intel_irq_init - initializes irq support
  * @dev_priv: i915 device instance
@@ -4413,7 +4426,9 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
 		if (I915_HAS_HOTPLUG(dev_priv))
 			dev_priv->hotplug_funcs = &i915_hpd_funcs;
 	} else {
-		if (HAS_PCH_DG1(dev_priv))
+		if (HAS_PCH_DG2(dev_priv))
+			dev_priv->hotplug_funcs = &icp_hpd_funcs;
+		else if (HAS_PCH_DG1(dev_priv))
 			dev_priv->hotplug_funcs = &dg1_hpd_funcs;
 		else if (DISPLAY_VER(dev_priv) >= 11)
 			dev_priv->hotplug_funcs = &gen11_hpd_funcs;
diff --git a/drivers/gpu/drm/i915/i915_irq.h b/drivers/gpu/drm/i915/i915_irq.h
index 0eb90d271fa70ee0b44fcf4ad14c6109576d8f0e..82639d9d7e820741459b1009e9fdce96492fcfb7 100644
--- a/drivers/gpu/drm/i915/i915_irq.h
+++ b/drivers/gpu/drm/i915/i915_irq.h
@@ -37,6 +37,7 @@ i915_disable_pipestat(struct drm_i915_private *dev_priv, enum pipe pipe,
 void valleyview_enable_display_irqs(struct drm_i915_private *dev_priv);
 void valleyview_disable_display_irqs(struct drm_i915_private *dev_priv);
 
+void intel_hpd_irq_setup(struct drm_i915_private *i915);
 void i915_hotplug_interrupt_update(struct drm_i915_private *dev_priv,
 				   u32 mask,
 				   u32 bits);
diff --git a/drivers/gpu/drm/i915/i915_mitigations.c b/drivers/gpu/drm/i915/i915_mitigations.c
index 84f12598d145845bb9f7ef1f3a50d566ca6980d8..def7302ef7fe27b53ff855f39b4e4a630cadefc3 100644
--- a/drivers/gpu/drm/i915/i915_mitigations.c
+++ b/drivers/gpu/drm/i915/i915_mitigations.c
@@ -8,6 +8,7 @@
 #include <linux/slab.h>
 #include <linux/string.h>
 
+#include "i915_driver.h"
 #include "i915_drv.h"
 #include "i915_mitigations.h"
 
diff --git a/drivers/gpu/drm/i915/i915_module.c b/drivers/gpu/drm/i915/i915_module.c
index 8451822637f04961ac4414ef7919304b5c7826f5..65acd7bf75d0834dc20131d07b7d13ed2d83d7ad 100644
--- a/drivers/gpu/drm/i915/i915_module.c
+++ b/drivers/gpu/drm/i915/i915_module.c
@@ -9,6 +9,7 @@
 #include "gem/i915_gem_context.h"
 #include "gem/i915_gem_object.h"
 #include "i915_active.h"
+#include "i915_driver.h"
 #include "i915_params.h"
 #include "i915_pci.h"
 #include "i915_perf.h"
@@ -16,6 +17,7 @@
 #include "i915_scheduler.h"
 #include "i915_selftest.h"
 #include "i915_vma.h"
+#include "i915_vma_resource.h"
 
 static int i915_check_nomodeset(void)
 {
@@ -61,6 +63,8 @@ static const struct {
 	  .exit = i915_scheduler_module_exit },
 	{ .init = i915_vma_module_init,
 	  .exit = i915_vma_module_exit },
+	{ .init = i915_vma_resource_module_init,
+	  .exit = i915_vma_resource_module_exit },
 	{ .init = i915_mock_selftests },
 	{ .init = i915_pmu_init,
 	  .exit = i915_pmu_exit },
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 189d3bb8955a0ca3c6011d788371e934b5125092..8246cbe9b01db92ada9cae93ca3128270d32f018 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -22,6 +22,7 @@
  *
  */
 
+#include <drm/drm_color_mgmt.h>
 #include <drm/drm_drv.h>
 #include <drm/i915_pciids.h>
 
@@ -1046,6 +1047,7 @@ static const struct intel_device_info dg2_info = {
 	.graphics.rel = 55,
 	.media.rel = 55,
 	PLATFORM(INTEL_DG2),
+	.has_guc_deprivilege = 1,
 	.has_64k_pages = 1,
 	.platform_engine_mask =
 		BIT(RCS0) | BIT(BCS0) |
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 590efdb9de8db83e391aeb7bc408e624daffb2cb..00fb40029f4346422308f0bf753f907463bb6cbd 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -196,6 +196,7 @@
 #include <linux/uuid.h>
 
 #include "gem/i915_gem_context.h"
+#include "gem/i915_gem_internal.h"
 #include "gt/intel_engine_pm.h"
 #include "gt/intel_engine_regs.h"
 #include "gt/intel_engine_user.h"
@@ -205,9 +206,11 @@
 #include "gt/intel_gt_clock_utils.h"
 #include "gt/intel_gt_regs.h"
 #include "gt/intel_lrc.h"
+#include "gt/intel_lrc_reg.h"
 #include "gt/intel_ring.h"
 
 #include "i915_drv.h"
+#include "i915_file_private.h"
 #include "i915_perf.h"
 #include "i915_perf_oa_regs.h"
 
@@ -1633,8 +1636,8 @@ static int alloc_noa_wait(struct i915_perf_stream *stream)
 	struct drm_i915_gem_object *bo;
 	struct i915_vma *vma;
 	const u64 delay_ticks = 0xffffffffffffffff -
-		intel_gt_ns_to_clock_interval(stream->perf->i915->ggtt.vm.gt,
-					      atomic64_read(&stream->perf->noa_programming_delay));
+		intel_gt_ns_to_clock_interval(to_gt(stream->perf->i915),
+		atomic64_read(&stream->perf->noa_programming_delay));
 	const u32 base = stream->engine->mmio_base;
 #define CS_GPR(x) GEN8_RING_CS_GPR(base, x)
 	u32 *batch, *ts0, *cs, *jump;
@@ -2117,7 +2120,7 @@ gen8_update_reg_state_unlocked(const struct intel_context *ce,
 	u32 ctx_oactxctrl = stream->perf->ctx_oactxctrl_offset;
 	u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
 	/* The MMIO offsets for Flex EU registers aren't contiguous */
-	i915_reg_t flex_regs[] = {
+	static const i915_reg_t flex_regs[] = {
 		EU_PERF_CNTL0,
 		EU_PERF_CNTL1,
 		EU_PERF_CNTL2,
@@ -3545,7 +3548,7 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf,
 
 static u64 oa_exponent_to_ns(struct i915_perf *perf, int exponent)
 {
-	return intel_gt_clock_interval_to_ns(perf->i915->ggtt.vm.gt,
+	return intel_gt_clock_interval_to_ns(to_gt(perf->i915),
 					     2ULL << exponent);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 87c92314ee269bc003af7f7e313b221ea7134c7a..2b8a3086ed35aad70999a213e48eceb23228a503 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -115,9 +115,6 @@
  *  #define GEN8_BAR                    _MMIO(0xb888)
  */
 
-#define VLV_MIPI_BASE			VLV_DISPLAY_BASE
-#define BXT_MIPI_BASE			0x60000
-
 #define DISPLAY_MMIO_BASE(dev_priv)	(INTEL_INFO(dev_priv)->display_mmio_offset)
 
 /*
@@ -1830,117 +1827,8 @@
 					    _PALETTE_B, _CHV_PALETTE_C) + \
 				      (i) * 4)
 
-/* MCH MMIO space */
-
-/*
- * MCHBAR mirror.
- *
- * This mirrors the MCHBAR MMIO space whose location is determined by
- * device 0 function 0's pci config register 0x44 or 0x48 and matches it in
- * every way.  It is not accessible from the CP register read instructions.
- *
- * Starting from Haswell, you can't write registers using the MCHBAR mirror,
- * just read.
- */
-#define MCHBAR_MIRROR_BASE	0x10000
-
-#define MCHBAR_MIRROR_BASE_SNB	0x140000
-
-#define CTG_STOLEN_RESERVED		_MMIO(MCHBAR_MIRROR_BASE + 0x34)
-#define ELK_STOLEN_RESERVED		_MMIO(MCHBAR_MIRROR_BASE + 0x48)
-#define G4X_STOLEN_RESERVED_ADDR1_MASK	(0xFFFF << 16)
-#define G4X_STOLEN_RESERVED_ADDR2_MASK	(0xFFF << 4)
-#define G4X_STOLEN_RESERVED_ENABLE	(1 << 0)
-
-/* Memory controller frequency in MCHBAR for Haswell (possible SNB+) */
-#define DCLK _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5e04)
-
-/* 915-945 and GM965 MCH register controlling DRAM channel access */
-#define DCC			_MMIO(MCHBAR_MIRROR_BASE + 0x200)
-#define DCC_ADDRESSING_MODE_SINGLE_CHANNEL		(0 << 0)
-#define DCC_ADDRESSING_MODE_DUAL_CHANNEL_ASYMMETRIC	(1 << 0)
-#define DCC_ADDRESSING_MODE_DUAL_CHANNEL_INTERLEAVED	(2 << 0)
-#define DCC_ADDRESSING_MODE_MASK			(3 << 0)
-#define DCC_CHANNEL_XOR_DISABLE				(1 << 10)
-#define DCC_CHANNEL_XOR_BIT_17				(1 << 9)
-#define DCC2			_MMIO(MCHBAR_MIRROR_BASE + 0x204)
-#define DCC2_MODIFIED_ENHANCED_DISABLE			(1 << 20)
-
-/* Pineview MCH register contains DDR3 setting */
-#define CSHRDDR3CTL            _MMIO(MCHBAR_MIRROR_BASE + 0x1a8)
-#define CSHRDDR3CTL_DDR3       (1 << 2)
-
-/* 965 MCH register controlling DRAM channel configuration */
-#define C0DRB3_BW		_MMIO(MCHBAR_MIRROR_BASE + 0x206)
-#define C1DRB3_BW		_MMIO(MCHBAR_MIRROR_BASE + 0x606)
-
-/* snb MCH registers for reading the DRAM channel configuration */
-#define MAD_DIMM_C0			_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5004)
-#define MAD_DIMM_C1			_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5008)
-#define MAD_DIMM_C2			_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x500C)
-#define   MAD_DIMM_ECC_MASK		(0x3 << 24)
-#define   MAD_DIMM_ECC_OFF		(0x0 << 24)
-#define   MAD_DIMM_ECC_IO_ON_LOGIC_OFF	(0x1 << 24)
-#define   MAD_DIMM_ECC_IO_OFF_LOGIC_ON	(0x2 << 24)
-#define   MAD_DIMM_ECC_ON		(0x3 << 24)
-#define   MAD_DIMM_ENH_INTERLEAVE	(0x1 << 22)
-#define   MAD_DIMM_RANK_INTERLEAVE	(0x1 << 21)
-#define   MAD_DIMM_B_WIDTH_X16		(0x1 << 20) /* X8 chips if unset */
-#define   MAD_DIMM_A_WIDTH_X16		(0x1 << 19) /* X8 chips if unset */
-#define   MAD_DIMM_B_DUAL_RANK		(0x1 << 18)
-#define   MAD_DIMM_A_DUAL_RANK		(0x1 << 17)
-#define   MAD_DIMM_A_SELECT		(0x1 << 16)
-/* DIMM sizes are in multiples of 256mb. */
-#define   MAD_DIMM_B_SIZE_SHIFT		8
-#define   MAD_DIMM_B_SIZE_MASK		(0xff << MAD_DIMM_B_SIZE_SHIFT)
-#define   MAD_DIMM_A_SIZE_SHIFT		0
-#define   MAD_DIMM_A_SIZE_MASK		(0xff << MAD_DIMM_A_SIZE_SHIFT)
-
-/* snb MCH registers for priority tuning */
-#define MCH_SSKPD			_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5d10)
-#define   MCH_SSKPD_WM0_MASK		0x3f
-#define   MCH_SSKPD_WM0_VAL		0xc
-
-/* Clocking configuration register */
-#define CLKCFG			_MMIO(MCHBAR_MIRROR_BASE + 0xc00)
-#define CLKCFG_FSB_400					(0 << 0)	/* hrawclk 100 */
-#define CLKCFG_FSB_400_ALT				(5 << 0)	/* hrawclk 100 */
-#define CLKCFG_FSB_533					(1 << 0)	/* hrawclk 133 */
-#define CLKCFG_FSB_667					(3 << 0)	/* hrawclk 166 */
-#define CLKCFG_FSB_800					(2 << 0)	/* hrawclk 200 */
-#define CLKCFG_FSB_1067					(6 << 0)	/* hrawclk 266 */
-#define CLKCFG_FSB_1067_ALT				(0 << 0)	/* hrawclk 266 */
-#define CLKCFG_FSB_1333					(7 << 0)	/* hrawclk 333 */
-#define CLKCFG_FSB_1333_ALT				(4 << 0)	/* hrawclk 333 */
-#define CLKCFG_FSB_1600_ALT				(6 << 0)	/* hrawclk 400 */
-#define CLKCFG_FSB_MASK					(7 << 0)
-#define CLKCFG_MEM_533					(1 << 4)
-#define CLKCFG_MEM_667					(2 << 4)
-#define CLKCFG_MEM_800					(3 << 4)
-#define CLKCFG_MEM_MASK					(7 << 4)
-
-#define HPLLVCO                 _MMIO(MCHBAR_MIRROR_BASE + 0xc38)
-#define HPLLVCO_MOBILE          _MMIO(MCHBAR_MIRROR_BASE + 0xc0f)
-
-#define TSC1			_MMIO(0x11001)
-#define   TSE			(1 << 0)
-#define TR1			_MMIO(0x11006)
-#define TSFS			_MMIO(0x11020)
-#define   TSFS_SLOPE_MASK	0x0000ff00
-#define   TSFS_SLOPE_SHIFT	8
-#define   TSFS_INTR_MASK	0x000000ff
-
-#define CSIPLL0			_MMIO(0x12c10)
-#define DDRMPLL1		_MMIO(0X12c20)
 #define PEG_BAND_GAP_DATA	_MMIO(0x14d68)
 
-#define GEN6_GT_PERF_STATUS	_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5948)
-#define BXT_GT_PERF_STATUS      _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x7070)
-#define GEN6_RP_STATE_LIMITS	_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5994)
-#define GEN6_RP_STATE_CAP	_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5998)
-#define   RP0_CAP_MASK		REG_GENMASK(7, 0)
-#define   RP1_CAP_MASK		REG_GENMASK(15, 8)
-#define   RPN_CAP_MASK		REG_GENMASK(23, 16)
 #define BXT_RP_STATE_CAP        _MMIO(0x138170)
 #define GEN9_RP_STATE_LIMITS	_MMIO(0x138148)
 #define XEHPSDV_RP_STATE_CAP	_MMIO(0x250014)
@@ -4291,49 +4179,32 @@
 #define _WM0_PIPEC_IVB		0x45200
 #define WM0_PIPE_ILK(pipe)	_MMIO_PIPE3((pipe), _WM0_PIPEA_ILK, \
 					    _WM0_PIPEB_ILK, _WM0_PIPEC_IVB)
-#define  WM0_PIPE_PLANE_MASK	(0xffff << 16)
-#define  WM0_PIPE_PLANE_SHIFT	16
-#define  WM0_PIPE_SPRITE_MASK	(0xff << 8)
-#define  WM0_PIPE_SPRITE_SHIFT	8
-#define  WM0_PIPE_CURSOR_MASK	(0xff)
+#define  WM0_PIPE_PRIMARY_MASK	REG_GENMASK(31, 16)
+#define  WM0_PIPE_SPRITE_MASK	REG_GENMASK(15, 8)
+#define  WM0_PIPE_CURSOR_MASK	REG_GENMASK(7, 0)
+#define  WM0_PIPE_PRIMARY(x)	REG_FIELD_PREP(WM0_PIPE_PRIMARY_MASK, (x))
+#define  WM0_PIPE_SPRITE(x)	REG_FIELD_PREP(WM0_PIPE_SPRITE_MASK, (x))
+#define  WM0_PIPE_CURSOR(x)	REG_FIELD_PREP(WM0_PIPE_CURSOR_MASK, (x))
 #define WM1_LP_ILK		_MMIO(0x45108)
-#define  WM1_LP_SR_EN		(1 << 31)
-#define  WM1_LP_LATENCY_SHIFT	24
-#define  WM1_LP_LATENCY_MASK	(0x7f << 24)
-#define  WM1_LP_FBC_MASK	(0xf << 20)
-#define  WM1_LP_FBC_SHIFT	20
-#define  WM1_LP_FBC_SHIFT_BDW	19
-#define  WM1_LP_SR_MASK		(0x7ff << 8)
-#define  WM1_LP_SR_SHIFT	8
-#define  WM1_LP_CURSOR_MASK	(0xff)
 #define WM2_LP_ILK		_MMIO(0x4510c)
-#define  WM2_LP_EN		(1 << 31)
 #define WM3_LP_ILK		_MMIO(0x45110)
-#define  WM3_LP_EN		(1 << 31)
+#define  WM_LP_ENABLE		REG_BIT(31)
+#define  WM_LP_LATENCY_MASK	REG_GENMASK(30, 24)
+#define  WM_LP_FBC_MASK_BDW	REG_GENMASK(23, 19)
+#define  WM_LP_FBC_MASK_ILK	REG_GENMASK(23, 20)
+#define  WM_LP_PRIMARY_MASK	REG_GENMASK(18, 8)
+#define  WM_LP_CURSOR_MASK	REG_GENMASK(7, 0)
+#define  WM_LP_LATENCY(x)	REG_FIELD_PREP(WM_LP_LATENCY_MASK, (x))
+#define  WM_LP_FBC_BDW(x)	REG_FIELD_PREP(WM_LP_FBC_MASK_BDW, (x))
+#define  WM_LP_FBC_ILK(x)	REG_FIELD_PREP(WM_LP_FBC_MASK_ILK, (x))
+#define  WM_LP_PRIMARY(x)	REG_FIELD_PREP(WM_LP_PRIMARY_MASK, (x))
+#define  WM_LP_CURSOR(x)	REG_FIELD_PREP(WM_LP_CURSOR_MASK, (x))
 #define WM1S_LP_ILK		_MMIO(0x45120)
 #define WM2S_LP_IVB		_MMIO(0x45124)
 #define WM3S_LP_IVB		_MMIO(0x45128)
-#define  WM1S_LP_EN		(1 << 31)
-
-#define HSW_WM_LP_VAL(lat, fbc, pri, cur) \
-	(WM3_LP_EN | ((lat) << WM1_LP_LATENCY_SHIFT) | \
-	 ((fbc) << WM1_LP_FBC_SHIFT) | ((pri) << WM1_LP_SR_SHIFT) | (cur))
-
-/* Memory latency timer register */
-#define MLTR_ILK		_MMIO(0x11222)
-#define  MLTR_WM1_SHIFT		0
-#define  MLTR_WM2_SHIFT		8
-/* the unit of memory self-refresh latency time is 0.5us */
-#define  ILK_SRLT_MASK		0x3f
-
-
-/* the address where we get all kinds of latency value */
-#define SSKPD			_MMIO(0x5d10)
-#define SSKPD_WM_MASK		0x3f
-#define SSKPD_WM0_SHIFT		0
-#define SSKPD_WM1_SHIFT		8
-#define SSKPD_WM2_SHIFT		16
-#define SSKPD_WM3_SHIFT		24
+#define  WM_LP_SPRITE_ENABLE	REG_BIT(31) /* ilk/snb WM1S only */
+#define  WM_LP_SPRITE_MASK	REG_GENMASK(10, 0)
+#define  WM_LP_SPRITE(x)	REG_FIELD_PREP(WM_LP_SPRITE_MASK, (x))
 
 /*
  * The two pipe frame counter registers are not synchronized, so
@@ -5632,7 +5503,8 @@
 #define TGL_DMC_DEBUG_DC6_COUNT	_MMIO(0x101088)
 #define DG1_DMC_DEBUG_DC5_COUNT	_MMIO(0x134154)
 
-#define DMC_DEBUG3		_MMIO(0x101090)
+#define TGL_DMC_DEBUG3		_MMIO(0x101090)
+#define DG1_DMC_DEBUG3		_MMIO(0x13415c)
 
 /* Display Internal Timeout Register */
 #define RM_TIMEOUT		_MMIO(0x42060)
@@ -6040,11 +5912,14 @@
 #define HSW_NDE_RSTWRN_OPT	_MMIO(0x46408)
 #define  RESET_PCH_HANDSHAKE_ENABLE	(1 << 4)
 
-#define GEN8_CHICKEN_DCPR_1		_MMIO(0x46430)
-#define   SKL_SELECT_ALTERNATE_DC_EXIT	REG_BIT(30)
-#define   ICL_DELAY_PMRSP		REG_BIT(22)
-#define   DISABLE_FLR_SRC		REG_BIT(15)
-#define   MASK_WAKEMEM			REG_BIT(13)
+#define GEN8_CHICKEN_DCPR_1			_MMIO(0x46430)
+#define   SKL_SELECT_ALTERNATE_DC_EXIT		REG_BIT(30)
+#define   LATENCY_REPORTING_REMOVED_PIPE_C	REG_BIT(25)
+#define   LATENCY_REPORTING_REMOVED_PIPE_B	REG_BIT(24)
+#define   LATENCY_REPORTING_REMOVED_PIPE_A	REG_BIT(23)
+#define   ICL_DELAY_PMRSP			REG_BIT(22)
+#define   DISABLE_FLR_SRC			REG_BIT(15)
+#define   MASK_WAKEMEM				REG_BIT(13)
 
 #define GEN11_CHICKEN_DCPR_2			_MMIO(0x46434)
 #define   DCPR_MASK_MAXLATENCY_MEMUP_CLR	REG_BIT(27)
@@ -6181,6 +6056,7 @@
 /* south display engine interrupt: ICP/TGP */
 #define SDE_GMBUS_ICP			(1 << 23)
 #define SDE_TC_HOTPLUG_ICP(hpd_pin)	REG_BIT(24 + _HPD_PIN_TC(hpd_pin))
+#define SDE_TC_HOTPLUG_DG2(hpd_pin)	REG_BIT(25 + _HPD_PIN_TC(hpd_pin)) /* sigh */
 #define SDE_DDI_HOTPLUG_ICP(hpd_pin)	REG_BIT(16 + _HPD_PIN_DDI(hpd_pin))
 #define SDE_DDI_HOTPLUG_MASK_ICP	(SDE_DDI_HOTPLUG_ICP(HPD_PORT_D) | \
 					 SDE_DDI_HOTPLUG_ICP(HPD_PORT_C) | \
@@ -7986,6 +7862,12 @@ enum skl_power_gate {
 #define RKL_DPLL_CFGCR0(pll)		_MMIO_PLL(pll, _TGL_DPLL0_CFGCR0, \
 						  _TGL_DPLL1_CFGCR0)
 
+#define _TGL_DPLL0_DIV0					0x164B00
+#define _TGL_DPLL1_DIV0					0x164C00
+#define TGL_DPLL0_DIV0(pll)				_MMIO_PLL(pll, _TGL_DPLL0_DIV0, _TGL_DPLL1_DIV0)
+#define   TGL_DPLL0_DIV0_AFC_STARTUP_MASK		REG_GENMASK(27, 25)
+#define   TGL_DPLL0_DIV0_AFC_STARTUP(val)		REG_FIELD_PREP(TGL_DPLL0_DIV0_AFC_STARTUP_MASK, (val))
+
 #define _TGL_DPLL0_CFGCR1		0x164288
 #define _TGL_DPLL1_CFGCR1		0x164290
 #define _TGL_TBTPLL_CFGCR1		0x1642A0
@@ -8032,7 +7914,15 @@ enum skl_power_gate {
 #define _DKL_PHY6_BASE			0x16D000
 
 /* DEKEL PHY MMIO Address = Phy base + (internal address & ~index_mask) */
+#define _DKL_PCS_DW5			0x14
+#define DKL_PCS_DW5(tc_port)		_MMIO(_PORT(tc_port, _DKL_PHY1_BASE, \
+						    _DKL_PHY2_BASE) + \
+						    _DKL_PCS_DW5)
+#define   DKL_PCS_DW5_CORE_SOFTRESET	REG_BIT(11)
+
 #define _DKL_PLL_DIV0			0x200
+#define   DKL_PLL_DIV0_AFC_STARTUP_MASK	REG_GENMASK(27, 25)
+#define   DKL_PLL_DIV0_AFC_STARTUP(val)	REG_FIELD_PREP(DKL_PLL_DIV0_AFC_STARTUP_MASK, (val))
 #define   DKL_PLL_DIV0_INTEG_COEFF(x)	((x) << 16)
 #define   DKL_PLL_DIV0_INTEG_COEFF_MASK	(0x1F << 16)
 #define   DKL_PLL_DIV0_PROP_COEFF(x)	((x) << 12)
@@ -8042,6 +7932,10 @@ enum skl_power_gate {
 #define   DKL_PLL_DIV0_FBPREDIV_MASK	(0xF << DKL_PLL_DIV0_FBPREDIV_SHIFT)
 #define   DKL_PLL_DIV0_FBDIV_INT(x)	((x) << 0)
 #define   DKL_PLL_DIV0_FBDIV_INT_MASK	(0xFF << 0)
+#define   DKL_PLL_DIV0_MASK		(DKL_PLL_DIV0_INTEG_COEFF_MASK | \
+					 DKL_PLL_DIV0_PROP_COEFF_MASK | \
+					 DKL_PLL_DIV0_FBPREDIV_MASK | \
+					 DKL_PLL_DIV0_FBDIV_INT_MASK)
 #define DKL_PLL_DIV0(tc_port)		_MMIO(_PORT(tc_port, _DKL_PHY1_BASE, \
 						    _DKL_PHY2_BASE) + \
 						    _DKL_PLL_DIV0)
@@ -8215,93 +8109,7 @@ enum skl_power_gate {
 #define  DC_STATE_DEBUG_MASK_CORES	(1 << 0)
 #define  DC_STATE_DEBUG_MASK_MEMORY_UP	(1 << 1)
 
-#define BXT_D_CR_DRP0_DUNIT8			0x1000
-#define BXT_D_CR_DRP0_DUNIT9			0x1200
-#define  BXT_D_CR_DRP0_DUNIT_START		8
-#define  BXT_D_CR_DRP0_DUNIT_END		11
-#define BXT_D_CR_DRP0_DUNIT(x)	_MMIO(MCHBAR_MIRROR_BASE_SNB + \
-				      _PICK_EVEN((x) - 8, BXT_D_CR_DRP0_DUNIT8,\
-						 BXT_D_CR_DRP0_DUNIT9))
-#define  BXT_DRAM_RANK_MASK			0x3
-#define  BXT_DRAM_RANK_SINGLE			0x1
-#define  BXT_DRAM_RANK_DUAL			0x3
-#define  BXT_DRAM_WIDTH_MASK			(0x3 << 4)
-#define  BXT_DRAM_WIDTH_SHIFT			4
-#define  BXT_DRAM_WIDTH_X8			(0x0 << 4)
-#define  BXT_DRAM_WIDTH_X16			(0x1 << 4)
-#define  BXT_DRAM_WIDTH_X32			(0x2 << 4)
-#define  BXT_DRAM_WIDTH_X64			(0x3 << 4)
-#define  BXT_DRAM_SIZE_MASK			(0x7 << 6)
-#define  BXT_DRAM_SIZE_SHIFT			6
-#define  BXT_DRAM_SIZE_4GBIT			(0x0 << 6)
-#define  BXT_DRAM_SIZE_6GBIT			(0x1 << 6)
-#define  BXT_DRAM_SIZE_8GBIT			(0x2 << 6)
-#define  BXT_DRAM_SIZE_12GBIT			(0x3 << 6)
-#define  BXT_DRAM_SIZE_16GBIT			(0x4 << 6)
-#define  BXT_DRAM_TYPE_MASK			(0x7 << 22)
-#define  BXT_DRAM_TYPE_SHIFT			22
-#define  BXT_DRAM_TYPE_DDR3			(0x0 << 22)
-#define  BXT_DRAM_TYPE_LPDDR3			(0x1 << 22)
-#define  BXT_DRAM_TYPE_LPDDR4			(0x2 << 22)
-#define  BXT_DRAM_TYPE_DDR4			(0x4 << 22)
-
-#define SKL_MC_BIOS_DATA_0_0_0_MCHBAR_PCU	_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5E04)
-#define  DG1_GEAR_TYPE				REG_BIT(16)
-
-#define SKL_MAD_INTER_CHANNEL_0_0_0_MCHBAR_MCMAIN _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5000)
-#define  SKL_DRAM_DDR_TYPE_MASK			(0x3 << 0)
-#define  SKL_DRAM_DDR_TYPE_DDR4			(0 << 0)
-#define  SKL_DRAM_DDR_TYPE_DDR3			(1 << 0)
-#define  SKL_DRAM_DDR_TYPE_LPDDR3		(2 << 0)
-#define  SKL_DRAM_DDR_TYPE_LPDDR4		(3 << 0)
-
-#define SKL_MAD_DIMM_CH0_0_0_0_MCHBAR_MCMAIN	_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x500C)
-#define SKL_MAD_DIMM_CH1_0_0_0_MCHBAR_MCMAIN	_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5010)
-#define  SKL_DRAM_S_SHIFT			16
-#define  SKL_DRAM_SIZE_MASK			0x3F
-#define  SKL_DRAM_WIDTH_MASK			(0x3 << 8)
-#define  SKL_DRAM_WIDTH_SHIFT			8
-#define  SKL_DRAM_WIDTH_X8			(0x0 << 8)
-#define  SKL_DRAM_WIDTH_X16			(0x1 << 8)
-#define  SKL_DRAM_WIDTH_X32			(0x2 << 8)
-#define  SKL_DRAM_RANK_MASK			(0x1 << 10)
-#define  SKL_DRAM_RANK_SHIFT			10
-#define  SKL_DRAM_RANK_1			(0x0 << 10)
-#define  SKL_DRAM_RANK_2			(0x1 << 10)
-#define  SKL_DRAM_RANK_MASK			(0x1 << 10)
-#define  ICL_DRAM_SIZE_MASK			0x7F
-#define  ICL_DRAM_WIDTH_MASK			(0x3 << 7)
-#define  ICL_DRAM_WIDTH_SHIFT			7
-#define  ICL_DRAM_WIDTH_X8			(0x0 << 7)
-#define  ICL_DRAM_WIDTH_X16			(0x1 << 7)
-#define  ICL_DRAM_WIDTH_X32			(0x2 << 7)
-#define  ICL_DRAM_RANK_MASK			(0x3 << 9)
-#define  ICL_DRAM_RANK_SHIFT			9
-#define  ICL_DRAM_RANK_1			(0x0 << 9)
-#define  ICL_DRAM_RANK_2			(0x1 << 9)
-#define  ICL_DRAM_RANK_3			(0x2 << 9)
-#define  ICL_DRAM_RANK_4			(0x3 << 9)
-
-#define SA_PERF_STATUS_0_0_0_MCHBAR_PC		_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5918)
-#define  DG1_QCLK_RATIO_MASK			REG_GENMASK(9, 2)
-#define  DG1_QCLK_REFERENCE			REG_BIT(10)
-
-#define MCHBAR_CH0_CR_TC_PRE_0_0_0_MCHBAR	_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x4000)
-#define   DG1_DRAM_T_RDPRE_MASK			REG_GENMASK(16, 11)
-#define   DG1_DRAM_T_RP_MASK			REG_GENMASK(6, 0)
-#define MCHBAR_CH0_CR_TC_PRE_0_0_0_MCHBAR_HIGH	_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x4004)
-#define   DG1_DRAM_T_RCD_MASK			REG_GENMASK(15, 9)
-#define   DG1_DRAM_T_RAS_MASK			REG_GENMASK(8, 1)
-
-/*
- * Please see hsw_read_dcomp() and hsw_write_dcomp() before using this register,
- * since on HSW we can't write to it using intel_uncore_write.
- */
-#define D_COMP_HSW			_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5F0C)
 #define D_COMP_BDW			_MMIO(0x138144)
-#define  D_COMP_RCOMP_IN_PROGRESS	(1 << 9)
-#define  D_COMP_COMP_FORCE		(1 << 8)
-#define  D_COMP_COMP_DISABLE		(1 << 0)
 
 /* Pipe WM_LINETIME - watermark line time */
 #define _WM_LINETIME_A		0x45270
@@ -8591,93 +8399,6 @@ enum skl_power_gate {
 #define CGM_PIPE_GAMMA(pipe, i, w)	_MMIO(_PIPE(pipe, _CGM_PIPE_A_GAMMA, _CGM_PIPE_B_GAMMA) + (i) * 8 + (w) * 4)
 #define CGM_PIPE_MODE(pipe)		_MMIO_PIPE(pipe, _CGM_PIPE_A_MODE, _CGM_PIPE_B_MODE)
 
-/* MIPI DSI registers */
-
-#define _MIPI_PORT(port, a, c)	(((port) == PORT_A) ? a : c)	/* ports A and C only */
-#define _MMIO_MIPI(port, a, c)	_MMIO(_MIPI_PORT(port, a, c))
-
-/* Gen11 DSI */
-#define _MMIO_DSI(tc, dsi0, dsi1)	_MMIO_TRANS((tc) - TRANSCODER_DSI_0, \
-						    dsi0, dsi1)
-
-#define MIPIO_TXESC_CLK_DIV1			_MMIO(0x160004)
-#define  GLK_TX_ESC_CLK_DIV1_MASK			0x3FF
-#define MIPIO_TXESC_CLK_DIV2			_MMIO(0x160008)
-#define  GLK_TX_ESC_CLK_DIV2_MASK			0x3FF
-
-#define _ICL_DSI_ESC_CLK_DIV0		0x6b090
-#define _ICL_DSI_ESC_CLK_DIV1		0x6b890
-#define ICL_DSI_ESC_CLK_DIV(port)	_MMIO_PORT((port),	\
-							_ICL_DSI_ESC_CLK_DIV0, \
-							_ICL_DSI_ESC_CLK_DIV1)
-#define _ICL_DPHY_ESC_CLK_DIV0		0x162190
-#define _ICL_DPHY_ESC_CLK_DIV1		0x6C190
-#define ICL_DPHY_ESC_CLK_DIV(port)	_MMIO_PORT((port),	\
-						_ICL_DPHY_ESC_CLK_DIV0, \
-						_ICL_DPHY_ESC_CLK_DIV1)
-#define  ICL_BYTE_CLK_PER_ESC_CLK_MASK		(0x1f << 16)
-#define  ICL_BYTE_CLK_PER_ESC_CLK_SHIFT	16
-#define  ICL_ESC_CLK_DIV_MASK			0x1ff
-#define  ICL_ESC_CLK_DIV_SHIFT			0
-#define DSI_MAX_ESC_CLK			20000		/* in KHz */
-
-#define _ADL_MIPIO_REG			0x180
-#define ADL_MIPIO_DW(port, dw)		_MMIO(_ICL_COMBOPHY(port) + _ADL_MIPIO_REG + 4 * (dw))
-#define   TX_ESC_CLK_DIV_PHY_SEL	REGBIT(16)
-#define   TX_ESC_CLK_DIV_PHY_MASK	REG_GENMASK(23, 16)
-#define   TX_ESC_CLK_DIV_PHY		REG_FIELD_PREP(TX_ESC_CLK_DIV_PHY_MASK, 0x7f)
-
-#define _DSI_CMD_FRMCTL_0		0x6b034
-#define _DSI_CMD_FRMCTL_1		0x6b834
-#define DSI_CMD_FRMCTL(port)		_MMIO_PORT(port,	\
-						   _DSI_CMD_FRMCTL_0,\
-						   _DSI_CMD_FRMCTL_1)
-#define   DSI_FRAME_UPDATE_REQUEST		(1 << 31)
-#define   DSI_PERIODIC_FRAME_UPDATE_ENABLE	(1 << 29)
-#define   DSI_NULL_PACKET_ENABLE		(1 << 28)
-#define   DSI_FRAME_IN_PROGRESS			(1 << 0)
-
-#define _DSI_INTR_MASK_REG_0		0x6b070
-#define _DSI_INTR_MASK_REG_1		0x6b870
-#define DSI_INTR_MASK_REG(port)		_MMIO_PORT(port,	\
-						   _DSI_INTR_MASK_REG_0,\
-						   _DSI_INTR_MASK_REG_1)
-
-#define _DSI_INTR_IDENT_REG_0		0x6b074
-#define _DSI_INTR_IDENT_REG_1		0x6b874
-#define DSI_INTR_IDENT_REG(port)	_MMIO_PORT(port,	\
-						   _DSI_INTR_IDENT_REG_0,\
-						   _DSI_INTR_IDENT_REG_1)
-#define   DSI_TE_EVENT				(1 << 31)
-#define   DSI_RX_DATA_OR_BTA_TERMINATED		(1 << 30)
-#define   DSI_TX_DATA				(1 << 29)
-#define   DSI_ULPS_ENTRY_DONE			(1 << 28)
-#define   DSI_NON_TE_TRIGGER_RECEIVED		(1 << 27)
-#define   DSI_HOST_CHKSUM_ERROR			(1 << 26)
-#define   DSI_HOST_MULTI_ECC_ERROR		(1 << 25)
-#define   DSI_HOST_SINGL_ECC_ERROR		(1 << 24)
-#define   DSI_HOST_CONTENTION_DETECTED		(1 << 23)
-#define   DSI_HOST_FALSE_CONTROL_ERROR		(1 << 22)
-#define   DSI_HOST_TIMEOUT_ERROR		(1 << 21)
-#define   DSI_HOST_LOW_POWER_TX_SYNC_ERROR	(1 << 20)
-#define   DSI_HOST_ESCAPE_MODE_ENTRY_ERROR	(1 << 19)
-#define   DSI_FRAME_UPDATE_DONE			(1 << 16)
-#define   DSI_PROTOCOL_VIOLATION_REPORTED	(1 << 15)
-#define   DSI_INVALID_TX_LENGTH			(1 << 13)
-#define   DSI_INVALID_VC			(1 << 12)
-#define   DSI_INVALID_DATA_TYPE			(1 << 11)
-#define   DSI_PERIPHERAL_CHKSUM_ERROR		(1 << 10)
-#define   DSI_PERIPHERAL_MULTI_ECC_ERROR	(1 << 9)
-#define   DSI_PERIPHERAL_SINGLE_ECC_ERROR	(1 << 8)
-#define   DSI_PERIPHERAL_CONTENTION_DETECTED	(1 << 7)
-#define   DSI_PERIPHERAL_FALSE_CTRL_ERROR	(1 << 6)
-#define   DSI_PERIPHERAL_TIMEOUT_ERROR		(1 << 5)
-#define   DSI_PERIPHERAL_LP_TX_SYNC_ERROR	(1 << 4)
-#define   DSI_PERIPHERAL_ESC_MODE_ENTRY_CMD_ERR	(1 << 3)
-#define   DSI_EOT_SYNC_ERROR			(1 << 2)
-#define   DSI_SOT_SYNC_ERROR			(1 << 1)
-#define   DSI_SOT_ERROR				(1 << 0)
-
 /* Gen4+ Timestamp and Pipe Frame time stamp registers */
 #define GEN4_TIMESTAMP		_MMIO(0x2358)
 #define ILK_TIMESTAMP_HI	_MMIO(0x70070)
@@ -8693,143 +8414,6 @@ enum skl_power_gate {
 #define PIPE_FRMTMSTMP(pipe)		\
 			_MMIO_PIPE2(pipe, _PIPE_FRMTMSTMP_A)
 
-/* BXT MIPI clock controls */
-#define BXT_MAX_VAR_OUTPUT_KHZ			39500
-
-#define BXT_MIPI_CLOCK_CTL			_MMIO(0x46090)
-#define  BXT_MIPI1_DIV_SHIFT			26
-#define  BXT_MIPI2_DIV_SHIFT			10
-#define  BXT_MIPI_DIV_SHIFT(port)		\
-			_MIPI_PORT(port, BXT_MIPI1_DIV_SHIFT, \
-					BXT_MIPI2_DIV_SHIFT)
-
-/* TX control divider to select actual TX clock output from (8x/var) */
-#define  BXT_MIPI1_TX_ESCLK_SHIFT		26
-#define  BXT_MIPI2_TX_ESCLK_SHIFT		10
-#define  BXT_MIPI_TX_ESCLK_SHIFT(port)		\
-			_MIPI_PORT(port, BXT_MIPI1_TX_ESCLK_SHIFT, \
-					BXT_MIPI2_TX_ESCLK_SHIFT)
-#define  BXT_MIPI1_TX_ESCLK_FIXDIV_MASK		(0x3F << 26)
-#define  BXT_MIPI2_TX_ESCLK_FIXDIV_MASK		(0x3F << 10)
-#define  BXT_MIPI_TX_ESCLK_FIXDIV_MASK(port)	\
-			_MIPI_PORT(port, BXT_MIPI1_TX_ESCLK_FIXDIV_MASK, \
-					BXT_MIPI2_TX_ESCLK_FIXDIV_MASK)
-#define  BXT_MIPI_TX_ESCLK_DIVIDER(port, val)	\
-		(((val) & 0x3F) << BXT_MIPI_TX_ESCLK_SHIFT(port))
-/* RX upper control divider to select actual RX clock output from 8x */
-#define  BXT_MIPI1_RX_ESCLK_UPPER_SHIFT		21
-#define  BXT_MIPI2_RX_ESCLK_UPPER_SHIFT		5
-#define  BXT_MIPI_RX_ESCLK_UPPER_SHIFT(port)		\
-			_MIPI_PORT(port, BXT_MIPI1_RX_ESCLK_UPPER_SHIFT, \
-					BXT_MIPI2_RX_ESCLK_UPPER_SHIFT)
-#define  BXT_MIPI1_RX_ESCLK_UPPER_FIXDIV_MASK		(3 << 21)
-#define  BXT_MIPI2_RX_ESCLK_UPPER_FIXDIV_MASK		(3 << 5)
-#define  BXT_MIPI_RX_ESCLK_UPPER_FIXDIV_MASK(port)	\
-			_MIPI_PORT(port, BXT_MIPI1_RX_ESCLK_UPPER_FIXDIV_MASK, \
-					BXT_MIPI2_RX_ESCLK_UPPER_FIXDIV_MASK)
-#define  BXT_MIPI_RX_ESCLK_UPPER_DIVIDER(port, val)	\
-		(((val) & 3) << BXT_MIPI_RX_ESCLK_UPPER_SHIFT(port))
-/* 8/3X divider to select the actual 8/3X clock output from 8x */
-#define  BXT_MIPI1_8X_BY3_SHIFT                19
-#define  BXT_MIPI2_8X_BY3_SHIFT                3
-#define  BXT_MIPI_8X_BY3_SHIFT(port)          \
-			_MIPI_PORT(port, BXT_MIPI1_8X_BY3_SHIFT, \
-					BXT_MIPI2_8X_BY3_SHIFT)
-#define  BXT_MIPI1_8X_BY3_DIVIDER_MASK         (3 << 19)
-#define  BXT_MIPI2_8X_BY3_DIVIDER_MASK         (3 << 3)
-#define  BXT_MIPI_8X_BY3_DIVIDER_MASK(port)    \
-			_MIPI_PORT(port, BXT_MIPI1_8X_BY3_DIVIDER_MASK, \
-						BXT_MIPI2_8X_BY3_DIVIDER_MASK)
-#define  BXT_MIPI_8X_BY3_DIVIDER(port, val)    \
-			(((val) & 3) << BXT_MIPI_8X_BY3_SHIFT(port))
-/* RX lower control divider to select actual RX clock output from 8x */
-#define  BXT_MIPI1_RX_ESCLK_LOWER_SHIFT		16
-#define  BXT_MIPI2_RX_ESCLK_LOWER_SHIFT		0
-#define  BXT_MIPI_RX_ESCLK_LOWER_SHIFT(port)		\
-			_MIPI_PORT(port, BXT_MIPI1_RX_ESCLK_LOWER_SHIFT, \
-					BXT_MIPI2_RX_ESCLK_LOWER_SHIFT)
-#define  BXT_MIPI1_RX_ESCLK_LOWER_FIXDIV_MASK		(3 << 16)
-#define  BXT_MIPI2_RX_ESCLK_LOWER_FIXDIV_MASK		(3 << 0)
-#define  BXT_MIPI_RX_ESCLK_LOWER_FIXDIV_MASK(port)	\
-			_MIPI_PORT(port, BXT_MIPI1_RX_ESCLK_LOWER_FIXDIV_MASK, \
-					BXT_MIPI2_RX_ESCLK_LOWER_FIXDIV_MASK)
-#define  BXT_MIPI_RX_ESCLK_LOWER_DIVIDER(port, val)	\
-		(((val) & 3) << BXT_MIPI_RX_ESCLK_LOWER_SHIFT(port))
-
-#define RX_DIVIDER_BIT_1_2                     0x3
-#define RX_DIVIDER_BIT_3_4                     0xC
-
-/* BXT MIPI mode configure */
-#define  _BXT_MIPIA_TRANS_HACTIVE			0x6B0F8
-#define  _BXT_MIPIC_TRANS_HACTIVE			0x6B8F8
-#define  BXT_MIPI_TRANS_HACTIVE(tc)	_MMIO_MIPI(tc, \
-		_BXT_MIPIA_TRANS_HACTIVE, _BXT_MIPIC_TRANS_HACTIVE)
-
-#define  _BXT_MIPIA_TRANS_VACTIVE			0x6B0FC
-#define  _BXT_MIPIC_TRANS_VACTIVE			0x6B8FC
-#define  BXT_MIPI_TRANS_VACTIVE(tc)	_MMIO_MIPI(tc, \
-		_BXT_MIPIA_TRANS_VACTIVE, _BXT_MIPIC_TRANS_VACTIVE)
-
-#define  _BXT_MIPIA_TRANS_VTOTAL			0x6B100
-#define  _BXT_MIPIC_TRANS_VTOTAL			0x6B900
-#define  BXT_MIPI_TRANS_VTOTAL(tc)	_MMIO_MIPI(tc, \
-		_BXT_MIPIA_TRANS_VTOTAL, _BXT_MIPIC_TRANS_VTOTAL)
-
-#define BXT_DSI_PLL_CTL			_MMIO(0x161000)
-#define  BXT_DSI_PLL_PVD_RATIO_SHIFT	16
-#define  BXT_DSI_PLL_PVD_RATIO_MASK	(3 << BXT_DSI_PLL_PVD_RATIO_SHIFT)
-#define  BXT_DSI_PLL_PVD_RATIO_1	(1 << BXT_DSI_PLL_PVD_RATIO_SHIFT)
-#define  BXT_DSIC_16X_BY1		(0 << 10)
-#define  BXT_DSIC_16X_BY2		(1 << 10)
-#define  BXT_DSIC_16X_BY3		(2 << 10)
-#define  BXT_DSIC_16X_BY4		(3 << 10)
-#define  BXT_DSIC_16X_MASK		(3 << 10)
-#define  BXT_DSIA_16X_BY1		(0 << 8)
-#define  BXT_DSIA_16X_BY2		(1 << 8)
-#define  BXT_DSIA_16X_BY3		(2 << 8)
-#define  BXT_DSIA_16X_BY4		(3 << 8)
-#define  BXT_DSIA_16X_MASK		(3 << 8)
-#define  BXT_DSI_FREQ_SEL_SHIFT		8
-#define  BXT_DSI_FREQ_SEL_MASK		(0xF << BXT_DSI_FREQ_SEL_SHIFT)
-
-#define BXT_DSI_PLL_RATIO_MAX		0x7D
-#define BXT_DSI_PLL_RATIO_MIN		0x22
-#define GLK_DSI_PLL_RATIO_MAX		0x6F
-#define GLK_DSI_PLL_RATIO_MIN		0x22
-#define BXT_DSI_PLL_RATIO_MASK		0xFF
-#define BXT_REF_CLOCK_KHZ		19200
-
-#define BXT_DSI_PLL_ENABLE		_MMIO(0x46080)
-#define  BXT_DSI_PLL_DO_ENABLE		(1 << 31)
-#define  BXT_DSI_PLL_LOCKED		(1 << 30)
-
-#define _MIPIA_PORT_CTRL			(VLV_DISPLAY_BASE + 0x61190)
-#define _MIPIC_PORT_CTRL			(VLV_DISPLAY_BASE + 0x61700)
-#define MIPI_PORT_CTRL(port)	_MMIO_MIPI(port, _MIPIA_PORT_CTRL, _MIPIC_PORT_CTRL)
-
- /* BXT port control */
-#define _BXT_MIPIA_PORT_CTRL				0x6B0C0
-#define _BXT_MIPIC_PORT_CTRL				0x6B8C0
-#define BXT_MIPI_PORT_CTRL(tc)	_MMIO_MIPI(tc, _BXT_MIPIA_PORT_CTRL, _BXT_MIPIC_PORT_CTRL)
-
-/* ICL DSI MODE control */
-#define _ICL_DSI_IO_MODECTL_0				0x6B094
-#define _ICL_DSI_IO_MODECTL_1				0x6B894
-#define ICL_DSI_IO_MODECTL(port)	_MMIO_PORT(port,	\
-						    _ICL_DSI_IO_MODECTL_0, \
-						    _ICL_DSI_IO_MODECTL_1)
-#define  COMBO_PHY_MODE_DSI				(1 << 0)
-
-/* TGL DSI Chicken register */
-#define _TGL_DSI_CHKN_REG_0			0x6B0C0
-#define _TGL_DSI_CHKN_REG_1			0x6B8C0
-#define TGL_DSI_CHKN_REG(port)		_MMIO_PORT(port,	\
-						    _TGL_DSI_CHKN_REG_0, \
-						    _TGL_DSI_CHKN_REG_1)
-#define TGL_DSI_CHKN_LSHS_GB_MASK		REG_GENMASK(15, 12)
-#define TGL_DSI_CHKN_LSHS_GB(byte_clocks)	REG_FIELD_PREP(TGL_DSI_CHKN_LSHS_GB_MASK, \
-							       (byte_clocks))
-
 /* Display Stream Splitter Control */
 #define DSS_CTL1				_MMIO(0x67400)
 #define  SPLITTER_ENABLE			(1 << 31)
@@ -8868,685 +8452,6 @@ enum skl_power_gate {
 							   _ICL_PIPE_DSS_CTL2_PB, \
 							   _ICL_PIPE_DSS_CTL2_PC)
 
-#define BXT_P_DSI_REGULATOR_CFG			_MMIO(0x160020)
-#define  STAP_SELECT					(1 << 0)
-
-#define BXT_P_DSI_REGULATOR_TX_CTRL		_MMIO(0x160054)
-#define  HS_IO_CTRL_SELECT				(1 << 0)
-
-#define  DPI_ENABLE					(1 << 31) /* A + C */
-#define  MIPIA_MIPI4DPHY_DELAY_COUNT_SHIFT		27
-#define  MIPIA_MIPI4DPHY_DELAY_COUNT_MASK		(0xf << 27)
-#define  DUAL_LINK_MODE_SHIFT				26
-#define  DUAL_LINK_MODE_MASK				(1 << 26)
-#define  DUAL_LINK_MODE_FRONT_BACK			(0 << 26)
-#define  DUAL_LINK_MODE_PIXEL_ALTERNATIVE		(1 << 26)
-#define  DITHERING_ENABLE				(1 << 25) /* A + C */
-#define  FLOPPED_HSTX					(1 << 23)
-#define  DE_INVERT					(1 << 19) /* XXX */
-#define  MIPIA_FLISDSI_DELAY_COUNT_SHIFT		18
-#define  MIPIA_FLISDSI_DELAY_COUNT_MASK			(0xf << 18)
-#define  AFE_LATCHOUT					(1 << 17)
-#define  LP_OUTPUT_HOLD					(1 << 16)
-#define  MIPIC_FLISDSI_DELAY_COUNT_HIGH_SHIFT		15
-#define  MIPIC_FLISDSI_DELAY_COUNT_HIGH_MASK		(1 << 15)
-#define  MIPIC_MIPI4DPHY_DELAY_COUNT_SHIFT		11
-#define  MIPIC_MIPI4DPHY_DELAY_COUNT_MASK		(0xf << 11)
-#define  CSB_SHIFT					9
-#define  CSB_MASK					(3 << 9)
-#define  CSB_20MHZ					(0 << 9)
-#define  CSB_10MHZ					(1 << 9)
-#define  CSB_40MHZ					(2 << 9)
-#define  BANDGAP_MASK					(1 << 8)
-#define  BANDGAP_PNW_CIRCUIT				(0 << 8)
-#define  BANDGAP_LNC_CIRCUIT				(1 << 8)
-#define  MIPIC_FLISDSI_DELAY_COUNT_LOW_SHIFT		5
-#define  MIPIC_FLISDSI_DELAY_COUNT_LOW_MASK		(7 << 5)
-#define  TEARING_EFFECT_DELAY				(1 << 4) /* A + C */
-#define  TEARING_EFFECT_SHIFT				2 /* A + C */
-#define  TEARING_EFFECT_MASK				(3 << 2)
-#define  TEARING_EFFECT_OFF				(0 << 2)
-#define  TEARING_EFFECT_DSI				(1 << 2)
-#define  TEARING_EFFECT_GPIO				(2 << 2)
-#define  LANE_CONFIGURATION_SHIFT			0
-#define  LANE_CONFIGURATION_MASK			(3 << 0)
-#define  LANE_CONFIGURATION_4LANE			(0 << 0)
-#define  LANE_CONFIGURATION_DUAL_LINK_A			(1 << 0)
-#define  LANE_CONFIGURATION_DUAL_LINK_B			(2 << 0)
-
-#define _MIPIA_TEARING_CTRL			(VLV_DISPLAY_BASE + 0x61194)
-#define _MIPIC_TEARING_CTRL			(VLV_DISPLAY_BASE + 0x61704)
-#define MIPI_TEARING_CTRL(port)			_MMIO_MIPI(port, _MIPIA_TEARING_CTRL, _MIPIC_TEARING_CTRL)
-#define  TEARING_EFFECT_DELAY_SHIFT			0
-#define  TEARING_EFFECT_DELAY_MASK			(0xffff << 0)
-
-/* XXX: all bits reserved */
-#define _MIPIA_AUTOPWG			(VLV_DISPLAY_BASE + 0x611a0)
-
-/* MIPI DSI Controller and D-PHY registers */
-
-#define _MIPIA_DEVICE_READY		(dev_priv->mipi_mmio_base + 0xb000)
-#define _MIPIC_DEVICE_READY		(dev_priv->mipi_mmio_base + 0xb800)
-#define MIPI_DEVICE_READY(port)		_MMIO_MIPI(port, _MIPIA_DEVICE_READY, _MIPIC_DEVICE_READY)
-#define  BUS_POSSESSION					(1 << 3) /* set to give bus to receiver */
-#define  ULPS_STATE_MASK				(3 << 1)
-#define  ULPS_STATE_ENTER				(2 << 1)
-#define  ULPS_STATE_EXIT				(1 << 1)
-#define  ULPS_STATE_NORMAL_OPERATION			(0 << 1)
-#define  DEVICE_READY					(1 << 0)
-
-#define _MIPIA_INTR_STAT		(dev_priv->mipi_mmio_base + 0xb004)
-#define _MIPIC_INTR_STAT		(dev_priv->mipi_mmio_base + 0xb804)
-#define MIPI_INTR_STAT(port)		_MMIO_MIPI(port, _MIPIA_INTR_STAT, _MIPIC_INTR_STAT)
-#define _MIPIA_INTR_EN			(dev_priv->mipi_mmio_base + 0xb008)
-#define _MIPIC_INTR_EN			(dev_priv->mipi_mmio_base + 0xb808)
-#define MIPI_INTR_EN(port)		_MMIO_MIPI(port, _MIPIA_INTR_EN, _MIPIC_INTR_EN)
-#define  TEARING_EFFECT					(1 << 31)
-#define  SPL_PKT_SENT_INTERRUPT				(1 << 30)
-#define  GEN_READ_DATA_AVAIL				(1 << 29)
-#define  LP_GENERIC_WR_FIFO_FULL			(1 << 28)
-#define  HS_GENERIC_WR_FIFO_FULL			(1 << 27)
-#define  RX_PROT_VIOLATION				(1 << 26)
-#define  RX_INVALID_TX_LENGTH				(1 << 25)
-#define  ACK_WITH_NO_ERROR				(1 << 24)
-#define  TURN_AROUND_ACK_TIMEOUT			(1 << 23)
-#define  LP_RX_TIMEOUT					(1 << 22)
-#define  HS_TX_TIMEOUT					(1 << 21)
-#define  DPI_FIFO_UNDERRUN				(1 << 20)
-#define  LOW_CONTENTION					(1 << 19)
-#define  HIGH_CONTENTION				(1 << 18)
-#define  TXDSI_VC_ID_INVALID				(1 << 17)
-#define  TXDSI_DATA_TYPE_NOT_RECOGNISED			(1 << 16)
-#define  TXCHECKSUM_ERROR				(1 << 15)
-#define  TXECC_MULTIBIT_ERROR				(1 << 14)
-#define  TXECC_SINGLE_BIT_ERROR				(1 << 13)
-#define  TXFALSE_CONTROL_ERROR				(1 << 12)
-#define  RXDSI_VC_ID_INVALID				(1 << 11)
-#define  RXDSI_DATA_TYPE_NOT_REGOGNISED			(1 << 10)
-#define  RXCHECKSUM_ERROR				(1 << 9)
-#define  RXECC_MULTIBIT_ERROR				(1 << 8)
-#define  RXECC_SINGLE_BIT_ERROR				(1 << 7)
-#define  RXFALSE_CONTROL_ERROR				(1 << 6)
-#define  RXHS_RECEIVE_TIMEOUT_ERROR			(1 << 5)
-#define  RX_LP_TX_SYNC_ERROR				(1 << 4)
-#define  RXEXCAPE_MODE_ENTRY_ERROR			(1 << 3)
-#define  RXEOT_SYNC_ERROR				(1 << 2)
-#define  RXSOT_SYNC_ERROR				(1 << 1)
-#define  RXSOT_ERROR					(1 << 0)
-
-#define _MIPIA_DSI_FUNC_PRG		(dev_priv->mipi_mmio_base + 0xb00c)
-#define _MIPIC_DSI_FUNC_PRG		(dev_priv->mipi_mmio_base + 0xb80c)
-#define MIPI_DSI_FUNC_PRG(port)		_MMIO_MIPI(port, _MIPIA_DSI_FUNC_PRG, _MIPIC_DSI_FUNC_PRG)
-#define  CMD_MODE_DATA_WIDTH_MASK			(7 << 13)
-#define  CMD_MODE_NOT_SUPPORTED				(0 << 13)
-#define  CMD_MODE_DATA_WIDTH_16_BIT			(1 << 13)
-#define  CMD_MODE_DATA_WIDTH_9_BIT			(2 << 13)
-#define  CMD_MODE_DATA_WIDTH_8_BIT			(3 << 13)
-#define  CMD_MODE_DATA_WIDTH_OPTION1			(4 << 13)
-#define  CMD_MODE_DATA_WIDTH_OPTION2			(5 << 13)
-#define  VID_MODE_FORMAT_MASK				(0xf << 7)
-#define  VID_MODE_NOT_SUPPORTED				(0 << 7)
-#define  VID_MODE_FORMAT_RGB565				(1 << 7)
-#define  VID_MODE_FORMAT_RGB666_PACKED			(2 << 7)
-#define  VID_MODE_FORMAT_RGB666				(3 << 7)
-#define  VID_MODE_FORMAT_RGB888				(4 << 7)
-#define  CMD_MODE_CHANNEL_NUMBER_SHIFT			5
-#define  CMD_MODE_CHANNEL_NUMBER_MASK			(3 << 5)
-#define  VID_MODE_CHANNEL_NUMBER_SHIFT			3
-#define  VID_MODE_CHANNEL_NUMBER_MASK			(3 << 3)
-#define  DATA_LANES_PRG_REG_SHIFT			0
-#define  DATA_LANES_PRG_REG_MASK			(7 << 0)
-
-#define _MIPIA_HS_TX_TIMEOUT		(dev_priv->mipi_mmio_base + 0xb010)
-#define _MIPIC_HS_TX_TIMEOUT		(dev_priv->mipi_mmio_base + 0xb810)
-#define MIPI_HS_TX_TIMEOUT(port)	_MMIO_MIPI(port, _MIPIA_HS_TX_TIMEOUT, _MIPIC_HS_TX_TIMEOUT)
-#define  HIGH_SPEED_TX_TIMEOUT_COUNTER_MASK		0xffffff
-
-#define _MIPIA_LP_RX_TIMEOUT		(dev_priv->mipi_mmio_base + 0xb014)
-#define _MIPIC_LP_RX_TIMEOUT		(dev_priv->mipi_mmio_base + 0xb814)
-#define MIPI_LP_RX_TIMEOUT(port)	_MMIO_MIPI(port, _MIPIA_LP_RX_TIMEOUT, _MIPIC_LP_RX_TIMEOUT)
-#define  LOW_POWER_RX_TIMEOUT_COUNTER_MASK		0xffffff
-
-#define _MIPIA_TURN_AROUND_TIMEOUT	(dev_priv->mipi_mmio_base + 0xb018)
-#define _MIPIC_TURN_AROUND_TIMEOUT	(dev_priv->mipi_mmio_base + 0xb818)
-#define MIPI_TURN_AROUND_TIMEOUT(port)	_MMIO_MIPI(port, _MIPIA_TURN_AROUND_TIMEOUT, _MIPIC_TURN_AROUND_TIMEOUT)
-#define  TURN_AROUND_TIMEOUT_MASK			0x3f
-
-#define _MIPIA_DEVICE_RESET_TIMER	(dev_priv->mipi_mmio_base + 0xb01c)
-#define _MIPIC_DEVICE_RESET_TIMER	(dev_priv->mipi_mmio_base + 0xb81c)
-#define MIPI_DEVICE_RESET_TIMER(port)	_MMIO_MIPI(port, _MIPIA_DEVICE_RESET_TIMER, _MIPIC_DEVICE_RESET_TIMER)
-#define  DEVICE_RESET_TIMER_MASK			0xffff
-
-#define _MIPIA_DPI_RESOLUTION		(dev_priv->mipi_mmio_base + 0xb020)
-#define _MIPIC_DPI_RESOLUTION		(dev_priv->mipi_mmio_base + 0xb820)
-#define MIPI_DPI_RESOLUTION(port)	_MMIO_MIPI(port, _MIPIA_DPI_RESOLUTION, _MIPIC_DPI_RESOLUTION)
-#define  VERTICAL_ADDRESS_SHIFT				16
-#define  VERTICAL_ADDRESS_MASK				(0xffff << 16)
-#define  HORIZONTAL_ADDRESS_SHIFT			0
-#define  HORIZONTAL_ADDRESS_MASK			0xffff
-
-#define _MIPIA_DBI_FIFO_THROTTLE	(dev_priv->mipi_mmio_base + 0xb024)
-#define _MIPIC_DBI_FIFO_THROTTLE	(dev_priv->mipi_mmio_base + 0xb824)
-#define MIPI_DBI_FIFO_THROTTLE(port)	_MMIO_MIPI(port, _MIPIA_DBI_FIFO_THROTTLE, _MIPIC_DBI_FIFO_THROTTLE)
-#define  DBI_FIFO_EMPTY_HALF				(0 << 0)
-#define  DBI_FIFO_EMPTY_QUARTER				(1 << 0)
-#define  DBI_FIFO_EMPTY_7_LOCATIONS			(2 << 0)
-
-/* regs below are bits 15:0 */
-#define _MIPIA_HSYNC_PADDING_COUNT	(dev_priv->mipi_mmio_base + 0xb028)
-#define _MIPIC_HSYNC_PADDING_COUNT	(dev_priv->mipi_mmio_base + 0xb828)
-#define MIPI_HSYNC_PADDING_COUNT(port)	_MMIO_MIPI(port, _MIPIA_HSYNC_PADDING_COUNT, _MIPIC_HSYNC_PADDING_COUNT)
-
-#define _MIPIA_HBP_COUNT		(dev_priv->mipi_mmio_base + 0xb02c)
-#define _MIPIC_HBP_COUNT		(dev_priv->mipi_mmio_base + 0xb82c)
-#define MIPI_HBP_COUNT(port)		_MMIO_MIPI(port, _MIPIA_HBP_COUNT, _MIPIC_HBP_COUNT)
-
-#define _MIPIA_HFP_COUNT		(dev_priv->mipi_mmio_base + 0xb030)
-#define _MIPIC_HFP_COUNT		(dev_priv->mipi_mmio_base + 0xb830)
-#define MIPI_HFP_COUNT(port)		_MMIO_MIPI(port, _MIPIA_HFP_COUNT, _MIPIC_HFP_COUNT)
-
-#define _MIPIA_HACTIVE_AREA_COUNT	(dev_priv->mipi_mmio_base + 0xb034)
-#define _MIPIC_HACTIVE_AREA_COUNT	(dev_priv->mipi_mmio_base + 0xb834)
-#define MIPI_HACTIVE_AREA_COUNT(port)	_MMIO_MIPI(port, _MIPIA_HACTIVE_AREA_COUNT, _MIPIC_HACTIVE_AREA_COUNT)
-
-#define _MIPIA_VSYNC_PADDING_COUNT	(dev_priv->mipi_mmio_base + 0xb038)
-#define _MIPIC_VSYNC_PADDING_COUNT	(dev_priv->mipi_mmio_base + 0xb838)
-#define MIPI_VSYNC_PADDING_COUNT(port)	_MMIO_MIPI(port, _MIPIA_VSYNC_PADDING_COUNT, _MIPIC_VSYNC_PADDING_COUNT)
-
-#define _MIPIA_VBP_COUNT		(dev_priv->mipi_mmio_base + 0xb03c)
-#define _MIPIC_VBP_COUNT		(dev_priv->mipi_mmio_base + 0xb83c)
-#define MIPI_VBP_COUNT(port)		_MMIO_MIPI(port, _MIPIA_VBP_COUNT, _MIPIC_VBP_COUNT)
-
-#define _MIPIA_VFP_COUNT		(dev_priv->mipi_mmio_base + 0xb040)
-#define _MIPIC_VFP_COUNT		(dev_priv->mipi_mmio_base + 0xb840)
-#define MIPI_VFP_COUNT(port)		_MMIO_MIPI(port, _MIPIA_VFP_COUNT, _MIPIC_VFP_COUNT)
-
-#define _MIPIA_HIGH_LOW_SWITCH_COUNT	(dev_priv->mipi_mmio_base + 0xb044)
-#define _MIPIC_HIGH_LOW_SWITCH_COUNT	(dev_priv->mipi_mmio_base + 0xb844)
-#define MIPI_HIGH_LOW_SWITCH_COUNT(port)	_MMIO_MIPI(port,	_MIPIA_HIGH_LOW_SWITCH_COUNT, _MIPIC_HIGH_LOW_SWITCH_COUNT)
-
-/* regs above are bits 15:0 */
-
-#define _MIPIA_DPI_CONTROL		(dev_priv->mipi_mmio_base + 0xb048)
-#define _MIPIC_DPI_CONTROL		(dev_priv->mipi_mmio_base + 0xb848)
-#define MIPI_DPI_CONTROL(port)		_MMIO_MIPI(port, _MIPIA_DPI_CONTROL, _MIPIC_DPI_CONTROL)
-#define  DPI_LP_MODE					(1 << 6)
-#define  BACKLIGHT_OFF					(1 << 5)
-#define  BACKLIGHT_ON					(1 << 4)
-#define  COLOR_MODE_OFF					(1 << 3)
-#define  COLOR_MODE_ON					(1 << 2)
-#define  TURN_ON					(1 << 1)
-#define  SHUTDOWN					(1 << 0)
-
-#define _MIPIA_DPI_DATA			(dev_priv->mipi_mmio_base + 0xb04c)
-#define _MIPIC_DPI_DATA			(dev_priv->mipi_mmio_base + 0xb84c)
-#define MIPI_DPI_DATA(port)		_MMIO_MIPI(port, _MIPIA_DPI_DATA, _MIPIC_DPI_DATA)
-#define  COMMAND_BYTE_SHIFT				0
-#define  COMMAND_BYTE_MASK				(0x3f << 0)
-
-#define _MIPIA_INIT_COUNT		(dev_priv->mipi_mmio_base + 0xb050)
-#define _MIPIC_INIT_COUNT		(dev_priv->mipi_mmio_base + 0xb850)
-#define MIPI_INIT_COUNT(port)		_MMIO_MIPI(port, _MIPIA_INIT_COUNT, _MIPIC_INIT_COUNT)
-#define  MASTER_INIT_TIMER_SHIFT			0
-#define  MASTER_INIT_TIMER_MASK				(0xffff << 0)
-
-#define _MIPIA_MAX_RETURN_PKT_SIZE	(dev_priv->mipi_mmio_base + 0xb054)
-#define _MIPIC_MAX_RETURN_PKT_SIZE	(dev_priv->mipi_mmio_base + 0xb854)
-#define MIPI_MAX_RETURN_PKT_SIZE(port)	_MMIO_MIPI(port, \
-			_MIPIA_MAX_RETURN_PKT_SIZE, _MIPIC_MAX_RETURN_PKT_SIZE)
-#define  MAX_RETURN_PKT_SIZE_SHIFT			0
-#define  MAX_RETURN_PKT_SIZE_MASK			(0x3ff << 0)
-
-#define _MIPIA_VIDEO_MODE_FORMAT	(dev_priv->mipi_mmio_base + 0xb058)
-#define _MIPIC_VIDEO_MODE_FORMAT	(dev_priv->mipi_mmio_base + 0xb858)
-#define MIPI_VIDEO_MODE_FORMAT(port)	_MMIO_MIPI(port, _MIPIA_VIDEO_MODE_FORMAT, _MIPIC_VIDEO_MODE_FORMAT)
-#define  RANDOM_DPI_DISPLAY_RESOLUTION			(1 << 4)
-#define  DISABLE_VIDEO_BTA				(1 << 3)
-#define  IP_TG_CONFIG					(1 << 2)
-#define  VIDEO_MODE_NON_BURST_WITH_SYNC_PULSE		(1 << 0)
-#define  VIDEO_MODE_NON_BURST_WITH_SYNC_EVENTS		(2 << 0)
-#define  VIDEO_MODE_BURST				(3 << 0)
-
-#define _MIPIA_EOT_DISABLE		(dev_priv->mipi_mmio_base + 0xb05c)
-#define _MIPIC_EOT_DISABLE		(dev_priv->mipi_mmio_base + 0xb85c)
-#define MIPI_EOT_DISABLE(port)		_MMIO_MIPI(port, _MIPIA_EOT_DISABLE, _MIPIC_EOT_DISABLE)
-#define  BXT_DEFEATURE_DPI_FIFO_CTR			(1 << 9)
-#define  BXT_DPHY_DEFEATURE_EN				(1 << 8)
-#define  LP_RX_TIMEOUT_ERROR_RECOVERY_DISABLE		(1 << 7)
-#define  HS_RX_TIMEOUT_ERROR_RECOVERY_DISABLE		(1 << 6)
-#define  LOW_CONTENTION_RECOVERY_DISABLE		(1 << 5)
-#define  HIGH_CONTENTION_RECOVERY_DISABLE		(1 << 4)
-#define  TXDSI_TYPE_NOT_RECOGNISED_ERROR_RECOVERY_DISABLE (1 << 3)
-#define  TXECC_MULTIBIT_ERROR_RECOVERY_DISABLE		(1 << 2)
-#define  CLOCKSTOP					(1 << 1)
-#define  EOT_DISABLE					(1 << 0)
-
-#define _MIPIA_LP_BYTECLK		(dev_priv->mipi_mmio_base + 0xb060)
-#define _MIPIC_LP_BYTECLK		(dev_priv->mipi_mmio_base + 0xb860)
-#define MIPI_LP_BYTECLK(port)		_MMIO_MIPI(port, _MIPIA_LP_BYTECLK, _MIPIC_LP_BYTECLK)
-#define  LP_BYTECLK_SHIFT				0
-#define  LP_BYTECLK_MASK				(0xffff << 0)
-
-#define _MIPIA_TLPX_TIME_COUNT		(dev_priv->mipi_mmio_base + 0xb0a4)
-#define _MIPIC_TLPX_TIME_COUNT		(dev_priv->mipi_mmio_base + 0xb8a4)
-#define MIPI_TLPX_TIME_COUNT(port)	 _MMIO_MIPI(port, _MIPIA_TLPX_TIME_COUNT, _MIPIC_TLPX_TIME_COUNT)
-
-#define _MIPIA_CLK_LANE_TIMING		(dev_priv->mipi_mmio_base + 0xb098)
-#define _MIPIC_CLK_LANE_TIMING		(dev_priv->mipi_mmio_base + 0xb898)
-#define MIPI_CLK_LANE_TIMING(port)	 _MMIO_MIPI(port, _MIPIA_CLK_LANE_TIMING, _MIPIC_CLK_LANE_TIMING)
-
-/* bits 31:0 */
-#define _MIPIA_LP_GEN_DATA		(dev_priv->mipi_mmio_base + 0xb064)
-#define _MIPIC_LP_GEN_DATA		(dev_priv->mipi_mmio_base + 0xb864)
-#define MIPI_LP_GEN_DATA(port)		_MMIO_MIPI(port, _MIPIA_LP_GEN_DATA, _MIPIC_LP_GEN_DATA)
-
-/* bits 31:0 */
-#define _MIPIA_HS_GEN_DATA		(dev_priv->mipi_mmio_base + 0xb068)
-#define _MIPIC_HS_GEN_DATA		(dev_priv->mipi_mmio_base + 0xb868)
-#define MIPI_HS_GEN_DATA(port)		_MMIO_MIPI(port, _MIPIA_HS_GEN_DATA, _MIPIC_HS_GEN_DATA)
-
-#define _MIPIA_LP_GEN_CTRL		(dev_priv->mipi_mmio_base + 0xb06c)
-#define _MIPIC_LP_GEN_CTRL		(dev_priv->mipi_mmio_base + 0xb86c)
-#define MIPI_LP_GEN_CTRL(port)		_MMIO_MIPI(port, _MIPIA_LP_GEN_CTRL, _MIPIC_LP_GEN_CTRL)
-#define _MIPIA_HS_GEN_CTRL		(dev_priv->mipi_mmio_base + 0xb070)
-#define _MIPIC_HS_GEN_CTRL		(dev_priv->mipi_mmio_base + 0xb870)
-#define MIPI_HS_GEN_CTRL(port)		_MMIO_MIPI(port, _MIPIA_HS_GEN_CTRL, _MIPIC_HS_GEN_CTRL)
-#define  LONG_PACKET_WORD_COUNT_SHIFT			8
-#define  LONG_PACKET_WORD_COUNT_MASK			(0xffff << 8)
-#define  SHORT_PACKET_PARAM_SHIFT			8
-#define  SHORT_PACKET_PARAM_MASK			(0xffff << 8)
-#define  VIRTUAL_CHANNEL_SHIFT				6
-#define  VIRTUAL_CHANNEL_MASK				(3 << 6)
-#define  DATA_TYPE_SHIFT				0
-#define  DATA_TYPE_MASK					(0x3f << 0)
-/* data type values, see include/video/mipi_display.h */
-
-#define _MIPIA_GEN_FIFO_STAT		(dev_priv->mipi_mmio_base + 0xb074)
-#define _MIPIC_GEN_FIFO_STAT		(dev_priv->mipi_mmio_base + 0xb874)
-#define MIPI_GEN_FIFO_STAT(port)	_MMIO_MIPI(port, _MIPIA_GEN_FIFO_STAT, _MIPIC_GEN_FIFO_STAT)
-#define  DPI_FIFO_EMPTY					(1 << 28)
-#define  DBI_FIFO_EMPTY					(1 << 27)
-#define  LP_CTRL_FIFO_EMPTY				(1 << 26)
-#define  LP_CTRL_FIFO_HALF_EMPTY			(1 << 25)
-#define  LP_CTRL_FIFO_FULL				(1 << 24)
-#define  HS_CTRL_FIFO_EMPTY				(1 << 18)
-#define  HS_CTRL_FIFO_HALF_EMPTY			(1 << 17)
-#define  HS_CTRL_FIFO_FULL				(1 << 16)
-#define  LP_DATA_FIFO_EMPTY				(1 << 10)
-#define  LP_DATA_FIFO_HALF_EMPTY			(1 << 9)
-#define  LP_DATA_FIFO_FULL				(1 << 8)
-#define  HS_DATA_FIFO_EMPTY				(1 << 2)
-#define  HS_DATA_FIFO_HALF_EMPTY			(1 << 1)
-#define  HS_DATA_FIFO_FULL				(1 << 0)
-
-#define _MIPIA_HS_LS_DBI_ENABLE		(dev_priv->mipi_mmio_base + 0xb078)
-#define _MIPIC_HS_LS_DBI_ENABLE		(dev_priv->mipi_mmio_base + 0xb878)
-#define MIPI_HS_LP_DBI_ENABLE(port)	_MMIO_MIPI(port, _MIPIA_HS_LS_DBI_ENABLE, _MIPIC_HS_LS_DBI_ENABLE)
-#define  DBI_HS_LP_MODE_MASK				(1 << 0)
-#define  DBI_LP_MODE					(1 << 0)
-#define  DBI_HS_MODE					(0 << 0)
-
-#define _MIPIA_DPHY_PARAM		(dev_priv->mipi_mmio_base + 0xb080)
-#define _MIPIC_DPHY_PARAM		(dev_priv->mipi_mmio_base + 0xb880)
-#define MIPI_DPHY_PARAM(port)		_MMIO_MIPI(port, _MIPIA_DPHY_PARAM, _MIPIC_DPHY_PARAM)
-#define  EXIT_ZERO_COUNT_SHIFT				24
-#define  EXIT_ZERO_COUNT_MASK				(0x3f << 24)
-#define  TRAIL_COUNT_SHIFT				16
-#define  TRAIL_COUNT_MASK				(0x1f << 16)
-#define  CLK_ZERO_COUNT_SHIFT				8
-#define  CLK_ZERO_COUNT_MASK				(0xff << 8)
-#define  PREPARE_COUNT_SHIFT				0
-#define  PREPARE_COUNT_MASK				(0x3f << 0)
-
-#define _ICL_DSI_T_INIT_MASTER_0	0x6b088
-#define _ICL_DSI_T_INIT_MASTER_1	0x6b888
-#define ICL_DSI_T_INIT_MASTER(port)	_MMIO_PORT(port,	\
-						   _ICL_DSI_T_INIT_MASTER_0,\
-						   _ICL_DSI_T_INIT_MASTER_1)
-
-#define _DPHY_CLK_TIMING_PARAM_0	0x162180
-#define _DPHY_CLK_TIMING_PARAM_1	0x6c180
-#define DPHY_CLK_TIMING_PARAM(port)	_MMIO_PORT(port,	\
-						   _DPHY_CLK_TIMING_PARAM_0,\
-						   _DPHY_CLK_TIMING_PARAM_1)
-#define _DSI_CLK_TIMING_PARAM_0		0x6b080
-#define _DSI_CLK_TIMING_PARAM_1		0x6b880
-#define DSI_CLK_TIMING_PARAM(port)	_MMIO_PORT(port,	\
-						   _DSI_CLK_TIMING_PARAM_0,\
-						   _DSI_CLK_TIMING_PARAM_1)
-#define  CLK_PREPARE_OVERRIDE		(1 << 31)
-#define  CLK_PREPARE(x)		((x) << 28)
-#define  CLK_PREPARE_MASK		(0x7 << 28)
-#define  CLK_PREPARE_SHIFT		28
-#define  CLK_ZERO_OVERRIDE		(1 << 27)
-#define  CLK_ZERO(x)			((x) << 20)
-#define  CLK_ZERO_MASK			(0xf << 20)
-#define  CLK_ZERO_SHIFT		20
-#define  CLK_PRE_OVERRIDE		(1 << 19)
-#define  CLK_PRE(x)			((x) << 16)
-#define  CLK_PRE_MASK			(0x3 << 16)
-#define  CLK_PRE_SHIFT			16
-#define  CLK_POST_OVERRIDE		(1 << 15)
-#define  CLK_POST(x)			((x) << 8)
-#define  CLK_POST_MASK			(0x7 << 8)
-#define  CLK_POST_SHIFT		8
-#define  CLK_TRAIL_OVERRIDE		(1 << 7)
-#define  CLK_TRAIL(x)			((x) << 0)
-#define  CLK_TRAIL_MASK		(0xf << 0)
-#define  CLK_TRAIL_SHIFT		0
-
-#define _DPHY_DATA_TIMING_PARAM_0	0x162184
-#define _DPHY_DATA_TIMING_PARAM_1	0x6c184
-#define DPHY_DATA_TIMING_PARAM(port)	_MMIO_PORT(port,	\
-						   _DPHY_DATA_TIMING_PARAM_0,\
-						   _DPHY_DATA_TIMING_PARAM_1)
-#define _DSI_DATA_TIMING_PARAM_0	0x6B084
-#define _DSI_DATA_TIMING_PARAM_1	0x6B884
-#define DSI_DATA_TIMING_PARAM(port)	_MMIO_PORT(port,	\
-						   _DSI_DATA_TIMING_PARAM_0,\
-						   _DSI_DATA_TIMING_PARAM_1)
-#define  HS_PREPARE_OVERRIDE		(1 << 31)
-#define  HS_PREPARE(x)			((x) << 24)
-#define  HS_PREPARE_MASK		(0x7 << 24)
-#define  HS_PREPARE_SHIFT		24
-#define  HS_ZERO_OVERRIDE		(1 << 23)
-#define  HS_ZERO(x)			((x) << 16)
-#define  HS_ZERO_MASK			(0xf << 16)
-#define  HS_ZERO_SHIFT			16
-#define  HS_TRAIL_OVERRIDE		(1 << 15)
-#define  HS_TRAIL(x)			((x) << 8)
-#define  HS_TRAIL_MASK			(0x7 << 8)
-#define  HS_TRAIL_SHIFT		8
-#define  HS_EXIT_OVERRIDE		(1 << 7)
-#define  HS_EXIT(x)			((x) << 0)
-#define  HS_EXIT_MASK			(0x7 << 0)
-#define  HS_EXIT_SHIFT			0
-
-#define _DPHY_TA_TIMING_PARAM_0		0x162188
-#define _DPHY_TA_TIMING_PARAM_1		0x6c188
-#define DPHY_TA_TIMING_PARAM(port)	_MMIO_PORT(port,	\
-						   _DPHY_TA_TIMING_PARAM_0,\
-						   _DPHY_TA_TIMING_PARAM_1)
-#define _DSI_TA_TIMING_PARAM_0		0x6b098
-#define _DSI_TA_TIMING_PARAM_1		0x6b898
-#define DSI_TA_TIMING_PARAM(port)	_MMIO_PORT(port,	\
-						   _DSI_TA_TIMING_PARAM_0,\
-						   _DSI_TA_TIMING_PARAM_1)
-#define  TA_SURE_OVERRIDE		(1 << 31)
-#define  TA_SURE(x)			((x) << 16)
-#define  TA_SURE_MASK			(0x1f << 16)
-#define  TA_SURE_SHIFT			16
-#define  TA_GO_OVERRIDE		(1 << 15)
-#define  TA_GO(x)			((x) << 8)
-#define  TA_GO_MASK			(0xf << 8)
-#define  TA_GO_SHIFT			8
-#define  TA_GET_OVERRIDE		(1 << 7)
-#define  TA_GET(x)			((x) << 0)
-#define  TA_GET_MASK			(0xf << 0)
-#define  TA_GET_SHIFT			0
-
-/* DSI transcoder configuration */
-#define _DSI_TRANS_FUNC_CONF_0		0x6b030
-#define _DSI_TRANS_FUNC_CONF_1		0x6b830
-#define DSI_TRANS_FUNC_CONF(tc)		_MMIO_DSI(tc,	\
-						  _DSI_TRANS_FUNC_CONF_0,\
-						  _DSI_TRANS_FUNC_CONF_1)
-#define  OP_MODE_MASK			(0x3 << 28)
-#define  OP_MODE_SHIFT			28
-#define  CMD_MODE_NO_GATE		(0x0 << 28)
-#define  CMD_MODE_TE_GATE		(0x1 << 28)
-#define  VIDEO_MODE_SYNC_EVENT		(0x2 << 28)
-#define  VIDEO_MODE_SYNC_PULSE		(0x3 << 28)
-#define  TE_SOURCE_GPIO			(1 << 27)
-#define  LINK_READY			(1 << 20)
-#define  PIX_FMT_MASK			(0x3 << 16)
-#define  PIX_FMT_SHIFT			16
-#define  PIX_FMT_RGB565			(0x0 << 16)
-#define  PIX_FMT_RGB666_PACKED		(0x1 << 16)
-#define  PIX_FMT_RGB666_LOOSE		(0x2 << 16)
-#define  PIX_FMT_RGB888			(0x3 << 16)
-#define  PIX_FMT_RGB101010		(0x4 << 16)
-#define  PIX_FMT_RGB121212		(0x5 << 16)
-#define  PIX_FMT_COMPRESSED		(0x6 << 16)
-#define  BGR_TRANSMISSION		(1 << 15)
-#define  PIX_VIRT_CHAN(x)		((x) << 12)
-#define  PIX_VIRT_CHAN_MASK		(0x3 << 12)
-#define  PIX_VIRT_CHAN_SHIFT		12
-#define  PIX_BUF_THRESHOLD_MASK		(0x3 << 10)
-#define  PIX_BUF_THRESHOLD_SHIFT	10
-#define  PIX_BUF_THRESHOLD_1_4		(0x0 << 10)
-#define  PIX_BUF_THRESHOLD_1_2		(0x1 << 10)
-#define  PIX_BUF_THRESHOLD_3_4		(0x2 << 10)
-#define  PIX_BUF_THRESHOLD_FULL		(0x3 << 10)
-#define  CONTINUOUS_CLK_MASK		(0x3 << 8)
-#define  CONTINUOUS_CLK_SHIFT		8
-#define  CLK_ENTER_LP_AFTER_DATA	(0x0 << 8)
-#define  CLK_HS_OR_LP			(0x2 << 8)
-#define  CLK_HS_CONTINUOUS		(0x3 << 8)
-#define  LINK_CALIBRATION_MASK		(0x3 << 4)
-#define  LINK_CALIBRATION_SHIFT		4
-#define  CALIBRATION_DISABLED		(0x0 << 4)
-#define  CALIBRATION_ENABLED_INITIAL_ONLY	(0x2 << 4)
-#define  CALIBRATION_ENABLED_INITIAL_PERIODIC	(0x3 << 4)
-#define  BLANKING_PACKET_ENABLE		(1 << 2)
-#define  S3D_ORIENTATION_LANDSCAPE	(1 << 1)
-#define  EOTP_DISABLED			(1 << 0)
-
-#define _DSI_CMD_RXCTL_0		0x6b0d4
-#define _DSI_CMD_RXCTL_1		0x6b8d4
-#define DSI_CMD_RXCTL(tc)		_MMIO_DSI(tc,	\
-						  _DSI_CMD_RXCTL_0,\
-						  _DSI_CMD_RXCTL_1)
-#define  READ_UNLOADS_DW		(1 << 16)
-#define  RECEIVED_UNASSIGNED_TRIGGER	(1 << 15)
-#define  RECEIVED_ACKNOWLEDGE_TRIGGER	(1 << 14)
-#define  RECEIVED_TEAR_EFFECT_TRIGGER	(1 << 13)
-#define  RECEIVED_RESET_TRIGGER		(1 << 12)
-#define  RECEIVED_PAYLOAD_WAS_LOST	(1 << 11)
-#define  RECEIVED_CRC_WAS_LOST		(1 << 10)
-#define  NUMBER_RX_PLOAD_DW_MASK	(0xff << 0)
-#define  NUMBER_RX_PLOAD_DW_SHIFT	0
-
-#define _DSI_CMD_TXCTL_0		0x6b0d0
-#define _DSI_CMD_TXCTL_1		0x6b8d0
-#define DSI_CMD_TXCTL(tc)		_MMIO_DSI(tc,	\
-						  _DSI_CMD_TXCTL_0,\
-						  _DSI_CMD_TXCTL_1)
-#define  KEEP_LINK_IN_HS		(1 << 24)
-#define  FREE_HEADER_CREDIT_MASK	(0x1f << 8)
-#define  FREE_HEADER_CREDIT_SHIFT	0x8
-#define  FREE_PLOAD_CREDIT_MASK		(0xff << 0)
-#define  FREE_PLOAD_CREDIT_SHIFT	0
-#define  MAX_HEADER_CREDIT		0x10
-#define  MAX_PLOAD_CREDIT		0x40
-
-#define _DSI_CMD_TXHDR_0		0x6b100
-#define _DSI_CMD_TXHDR_1		0x6b900
-#define DSI_CMD_TXHDR(tc)		_MMIO_DSI(tc,	\
-						  _DSI_CMD_TXHDR_0,\
-						  _DSI_CMD_TXHDR_1)
-#define  PAYLOAD_PRESENT		(1 << 31)
-#define  LP_DATA_TRANSFER		(1 << 30)
-#define  VBLANK_FENCE			(1 << 29)
-#define  PARAM_WC_MASK			(0xffff << 8)
-#define  PARAM_WC_LOWER_SHIFT		8
-#define  PARAM_WC_UPPER_SHIFT		16
-#define  VC_MASK			(0x3 << 6)
-#define  VC_SHIFT			6
-#define  DT_MASK			(0x3f << 0)
-#define  DT_SHIFT			0
-
-#define _DSI_CMD_TXPYLD_0		0x6b104
-#define _DSI_CMD_TXPYLD_1		0x6b904
-#define DSI_CMD_TXPYLD(tc)		_MMIO_DSI(tc,	\
-						  _DSI_CMD_TXPYLD_0,\
-						  _DSI_CMD_TXPYLD_1)
-
-#define _DSI_LP_MSG_0			0x6b0d8
-#define _DSI_LP_MSG_1			0x6b8d8
-#define DSI_LP_MSG(tc)			_MMIO_DSI(tc,	\
-						  _DSI_LP_MSG_0,\
-						  _DSI_LP_MSG_1)
-#define  LPTX_IN_PROGRESS		(1 << 17)
-#define  LINK_IN_ULPS			(1 << 16)
-#define  LINK_ULPS_TYPE_LP11		(1 << 8)
-#define  LINK_ENTER_ULPS		(1 << 0)
-
-/* DSI timeout registers */
-#define _DSI_HSTX_TO_0			0x6b044
-#define _DSI_HSTX_TO_1			0x6b844
-#define DSI_HSTX_TO(tc)			_MMIO_DSI(tc,	\
-						  _DSI_HSTX_TO_0,\
-						  _DSI_HSTX_TO_1)
-#define  HSTX_TIMEOUT_VALUE_MASK	(0xffff << 16)
-#define  HSTX_TIMEOUT_VALUE_SHIFT	16
-#define  HSTX_TIMEOUT_VALUE(x)		((x) << 16)
-#define  HSTX_TIMED_OUT			(1 << 0)
-
-#define _DSI_LPRX_HOST_TO_0		0x6b048
-#define _DSI_LPRX_HOST_TO_1		0x6b848
-#define DSI_LPRX_HOST_TO(tc)		_MMIO_DSI(tc,	\
-						  _DSI_LPRX_HOST_TO_0,\
-						  _DSI_LPRX_HOST_TO_1)
-#define  LPRX_TIMED_OUT			(1 << 16)
-#define  LPRX_TIMEOUT_VALUE_MASK	(0xffff << 0)
-#define  LPRX_TIMEOUT_VALUE_SHIFT	0
-#define  LPRX_TIMEOUT_VALUE(x)		((x) << 0)
-
-#define _DSI_PWAIT_TO_0			0x6b040
-#define _DSI_PWAIT_TO_1			0x6b840
-#define DSI_PWAIT_TO(tc)		_MMIO_DSI(tc,	\
-						  _DSI_PWAIT_TO_0,\
-						  _DSI_PWAIT_TO_1)
-#define  PRESET_TIMEOUT_VALUE_MASK	(0xffff << 16)
-#define  PRESET_TIMEOUT_VALUE_SHIFT	16
-#define  PRESET_TIMEOUT_VALUE(x)	((x) << 16)
-#define  PRESPONSE_TIMEOUT_VALUE_MASK	(0xffff << 0)
-#define  PRESPONSE_TIMEOUT_VALUE_SHIFT	0
-#define  PRESPONSE_TIMEOUT_VALUE(x)	((x) << 0)
-
-#define _DSI_TA_TO_0			0x6b04c
-#define _DSI_TA_TO_1			0x6b84c
-#define DSI_TA_TO(tc)			_MMIO_DSI(tc,	\
-						  _DSI_TA_TO_0,\
-						  _DSI_TA_TO_1)
-#define  TA_TIMED_OUT			(1 << 16)
-#define  TA_TIMEOUT_VALUE_MASK		(0xffff << 0)
-#define  TA_TIMEOUT_VALUE_SHIFT		0
-#define  TA_TIMEOUT_VALUE(x)		((x) << 0)
-
-/* bits 31:0 */
-#define _MIPIA_DBI_BW_CTRL		(dev_priv->mipi_mmio_base + 0xb084)
-#define _MIPIC_DBI_BW_CTRL		(dev_priv->mipi_mmio_base + 0xb884)
-#define MIPI_DBI_BW_CTRL(port)		_MMIO_MIPI(port, _MIPIA_DBI_BW_CTRL, _MIPIC_DBI_BW_CTRL)
-
-#define _MIPIA_CLK_LANE_SWITCH_TIME_CNT		(dev_priv->mipi_mmio_base + 0xb088)
-#define _MIPIC_CLK_LANE_SWITCH_TIME_CNT		(dev_priv->mipi_mmio_base + 0xb888)
-#define MIPI_CLK_LANE_SWITCH_TIME_CNT(port)	_MMIO_MIPI(port, _MIPIA_CLK_LANE_SWITCH_TIME_CNT, _MIPIC_CLK_LANE_SWITCH_TIME_CNT)
-#define  LP_HS_SSW_CNT_SHIFT				16
-#define  LP_HS_SSW_CNT_MASK				(0xffff << 16)
-#define  HS_LP_PWR_SW_CNT_SHIFT				0
-#define  HS_LP_PWR_SW_CNT_MASK				(0xffff << 0)
-
-#define _MIPIA_STOP_STATE_STALL		(dev_priv->mipi_mmio_base + 0xb08c)
-#define _MIPIC_STOP_STATE_STALL		(dev_priv->mipi_mmio_base + 0xb88c)
-#define MIPI_STOP_STATE_STALL(port)	_MMIO_MIPI(port, _MIPIA_STOP_STATE_STALL, _MIPIC_STOP_STATE_STALL)
-#define  STOP_STATE_STALL_COUNTER_SHIFT			0
-#define  STOP_STATE_STALL_COUNTER_MASK			(0xff << 0)
-
-#define _MIPIA_INTR_STAT_REG_1		(dev_priv->mipi_mmio_base + 0xb090)
-#define _MIPIC_INTR_STAT_REG_1		(dev_priv->mipi_mmio_base + 0xb890)
-#define MIPI_INTR_STAT_REG_1(port)	_MMIO_MIPI(port, _MIPIA_INTR_STAT_REG_1, _MIPIC_INTR_STAT_REG_1)
-#define _MIPIA_INTR_EN_REG_1		(dev_priv->mipi_mmio_base + 0xb094)
-#define _MIPIC_INTR_EN_REG_1		(dev_priv->mipi_mmio_base + 0xb894)
-#define MIPI_INTR_EN_REG_1(port)	_MMIO_MIPI(port, _MIPIA_INTR_EN_REG_1, _MIPIC_INTR_EN_REG_1)
-#define  RX_CONTENTION_DETECTED				(1 << 0)
-
-/* XXX: only pipe A ?!? */
-#define MIPIA_DBI_TYPEC_CTRL		(dev_priv->mipi_mmio_base + 0xb100)
-#define  DBI_TYPEC_ENABLE				(1 << 31)
-#define  DBI_TYPEC_WIP					(1 << 30)
-#define  DBI_TYPEC_OPTION_SHIFT				28
-#define  DBI_TYPEC_OPTION_MASK				(3 << 28)
-#define  DBI_TYPEC_FREQ_SHIFT				24
-#define  DBI_TYPEC_FREQ_MASK				(0xf << 24)
-#define  DBI_TYPEC_OVERRIDE				(1 << 8)
-#define  DBI_TYPEC_OVERRIDE_COUNTER_SHIFT		0
-#define  DBI_TYPEC_OVERRIDE_COUNTER_MASK		(0xff << 0)
-
-
-/* MIPI adapter registers */
-
-#define _MIPIA_CTRL			(dev_priv->mipi_mmio_base + 0xb104)
-#define _MIPIC_CTRL			(dev_priv->mipi_mmio_base + 0xb904)
-#define MIPI_CTRL(port)			_MMIO_MIPI(port, _MIPIA_CTRL, _MIPIC_CTRL)
-#define  ESCAPE_CLOCK_DIVIDER_SHIFT			5 /* A only */
-#define  ESCAPE_CLOCK_DIVIDER_MASK			(3 << 5)
-#define  ESCAPE_CLOCK_DIVIDER_1				(0 << 5)
-#define  ESCAPE_CLOCK_DIVIDER_2				(1 << 5)
-#define  ESCAPE_CLOCK_DIVIDER_4				(2 << 5)
-#define  READ_REQUEST_PRIORITY_SHIFT			3
-#define  READ_REQUEST_PRIORITY_MASK			(3 << 3)
-#define  READ_REQUEST_PRIORITY_LOW			(0 << 3)
-#define  READ_REQUEST_PRIORITY_HIGH			(3 << 3)
-#define  RGB_FLIP_TO_BGR				(1 << 2)
-
-#define  BXT_PIPE_SELECT_SHIFT				7
-#define  BXT_PIPE_SELECT_MASK				(7 << 7)
-#define  BXT_PIPE_SELECT(pipe)				((pipe) << 7)
-#define  GLK_PHY_STATUS_PORT_READY			(1 << 31) /* RO */
-#define  GLK_ULPS_NOT_ACTIVE				(1 << 30) /* RO */
-#define  GLK_MIPIIO_RESET_RELEASED			(1 << 28)
-#define  GLK_CLOCK_LANE_STOP_STATE			(1 << 27) /* RO */
-#define  GLK_DATA_LANE_STOP_STATE			(1 << 26) /* RO */
-#define  GLK_LP_WAKE					(1 << 22)
-#define  GLK_LP11_LOW_PWR_MODE				(1 << 21)
-#define  GLK_LP00_LOW_PWR_MODE				(1 << 20)
-#define  GLK_FIREWALL_ENABLE				(1 << 16)
-#define  BXT_PIXEL_OVERLAP_CNT_MASK			(0xf << 10)
-#define  BXT_PIXEL_OVERLAP_CNT_SHIFT			10
-#define  BXT_DSC_ENABLE					(1 << 3)
-#define  BXT_RGB_FLIP					(1 << 2)
-#define  GLK_MIPIIO_PORT_POWERED			(1 << 1) /* RO */
-#define  GLK_MIPIIO_ENABLE				(1 << 0)
-
-#define _MIPIA_DATA_ADDRESS		(dev_priv->mipi_mmio_base + 0xb108)
-#define _MIPIC_DATA_ADDRESS		(dev_priv->mipi_mmio_base + 0xb908)
-#define MIPI_DATA_ADDRESS(port)		_MMIO_MIPI(port, _MIPIA_DATA_ADDRESS, _MIPIC_DATA_ADDRESS)
-#define  DATA_MEM_ADDRESS_SHIFT				5
-#define  DATA_MEM_ADDRESS_MASK				(0x7ffffff << 5)
-#define  DATA_VALID					(1 << 0)
-
-#define _MIPIA_DATA_LENGTH		(dev_priv->mipi_mmio_base + 0xb10c)
-#define _MIPIC_DATA_LENGTH		(dev_priv->mipi_mmio_base + 0xb90c)
-#define MIPI_DATA_LENGTH(port)		_MMIO_MIPI(port, _MIPIA_DATA_LENGTH, _MIPIC_DATA_LENGTH)
-#define  DATA_LENGTH_SHIFT				0
-#define  DATA_LENGTH_MASK				(0xfffff << 0)
-
-#define _MIPIA_COMMAND_ADDRESS		(dev_priv->mipi_mmio_base + 0xb110)
-#define _MIPIC_COMMAND_ADDRESS		(dev_priv->mipi_mmio_base + 0xb910)
-#define MIPI_COMMAND_ADDRESS(port)	_MMIO_MIPI(port, _MIPIA_COMMAND_ADDRESS, _MIPIC_COMMAND_ADDRESS)
-#define  COMMAND_MEM_ADDRESS_SHIFT			5
-#define  COMMAND_MEM_ADDRESS_MASK			(0x7ffffff << 5)
-#define  AUTO_PWG_ENABLE				(1 << 2)
-#define  MEMORY_WRITE_DATA_FROM_PIPE_RENDERING		(1 << 1)
-#define  COMMAND_VALID					(1 << 0)
-
-#define _MIPIA_COMMAND_LENGTH		(dev_priv->mipi_mmio_base + 0xb114)
-#define _MIPIC_COMMAND_LENGTH		(dev_priv->mipi_mmio_base + 0xb914)
-#define MIPI_COMMAND_LENGTH(port)	_MMIO_MIPI(port, _MIPIA_COMMAND_LENGTH, _MIPIC_COMMAND_LENGTH)
-#define  COMMAND_LENGTH_SHIFT(n)			(8 * (n)) /* n: 0...3 */
-#define  COMMAND_LENGTH_MASK(n)				(0xff << (8 * (n)))
-
-#define _MIPIA_READ_DATA_RETURN0	(dev_priv->mipi_mmio_base + 0xb118)
-#define _MIPIC_READ_DATA_RETURN0	(dev_priv->mipi_mmio_base + 0xb918)
-#define MIPI_READ_DATA_RETURN(port, n) _MMIO(_MIPI(port, _MIPIA_READ_DATA_RETURN0, _MIPIC_READ_DATA_RETURN0) + 4 * (n)) /* n: 0...7 */
-
-#define _MIPIA_READ_DATA_VALID		(dev_priv->mipi_mmio_base + 0xb138)
-#define _MIPIC_READ_DATA_VALID		(dev_priv->mipi_mmio_base + 0xb938)
-#define MIPI_READ_DATA_VALID(port)	_MMIO_MIPI(port, _MIPIA_READ_DATA_VALID, _MIPIC_READ_DATA_VALID)
-#define  READ_DATA_VALID(n)				(1 << (n))
-
 #define GEN12_GSMBASE			_MMIO(0x108100)
 #define GEN12_DSMBASE			_MMIO(0x1080C0)
 
@@ -9557,8 +8462,10 @@ enum skl_power_gate {
 
 #define _ICL_PHY_MISC_A		0x64C00
 #define _ICL_PHY_MISC_B		0x64C04
-#define ICL_PHY_MISC(port)	_MMIO_PORT(port, _ICL_PHY_MISC_A, \
-						 _ICL_PHY_MISC_B)
+#define _DG2_PHY_MISC_TC1	0x64C14 /* TC1="PHY E" but offset as if "PHY F" */
+#define ICL_PHY_MISC(port)	_MMIO_PORT(port, _ICL_PHY_MISC_A, _ICL_PHY_MISC_B)
+#define DG2_PHY_MISC(port)	((port) == PHY_E ? _MMIO(_DG2_PHY_MISC_TC1) : \
+				 ICL_PHY_MISC(port))
 #define  ICL_PHY_MISC_MUX_DDID			(1 << 28)
 #define  ICL_PHY_MISC_DE_IO_COMP_PWR_DOWN	(1 << 23)
 #define  DG2_PHY_DP_TX_ACK_MASK			REG_GENMASK(23, 20)
@@ -9928,4 +8835,8 @@ enum skl_power_gate {
 #define CLKGATE_DIS_MISC			_MMIO(0x46534)
 #define  CLKGATE_DIS_MISC_DMASC_GATING_DIS	REG_BIT(21)
 
+#define GEN12_CULLBIT1			_MMIO(0x6100)
+#define GEN12_CULLBIT2			_MMIO(0x7030)
+#define GEN12_STATE_ACK_DEBUG		_MMIO(0x20BC)
+
 #endif /* _I915_REG_H_ */
diff --git a/drivers/gpu/drm/i915/i915_reg_defs.h b/drivers/gpu/drm/i915/i915_reg_defs.h
index 34d37bbf08cdebf80634e64594d23cfe8740d89e..d78d78fce43143f581936b9553bc214503c4f4d8 100644
--- a/drivers/gpu/drm/i915/i915_reg_defs.h
+++ b/drivers/gpu/drm/i915/i915_reg_defs.h
@@ -37,6 +37,21 @@
 				 __is_constexpr(__low) &&		\
 				 ((__low) < 0 || (__high) > 31 || (__low) > (__high)))))
 
+/**
+ * REG_GENMASK64() - Prepare a continuous u64 bitmask
+ * @__high: 0-based high bit
+ * @__low: 0-based low bit
+ *
+ * Local wrapper for GENMASK_ULL() to force u64, with compile time checks.
+ *
+ * @return: Continuous bitmask from @__high to @__low, inclusive.
+ */
+#define REG_GENMASK64(__high, __low)					\
+	((u64)(GENMASK_ULL(__high, __low) +				\
+	       BUILD_BUG_ON_ZERO(__is_constexpr(__high) &&		\
+				 __is_constexpr(__low) &&		\
+				 ((__low) < 0 || (__high) > 63 || (__low) > (__high)))))
+
 /*
  * Local integer constant expression version of is_power_of_2().
  */
@@ -71,6 +86,18 @@
  */
 #define REG_FIELD_GET(__mask, __val)	((u32)FIELD_GET(__mask, __val))
 
+/**
+ * REG_FIELD_GET64() - Extract a u64 bitfield value
+ * @__mask: shifted mask defining the field's length and position
+ * @__val: value to extract the bitfield value from
+ *
+ * Local wrapper for FIELD_GET() to force u64 and for consistency with
+ * REG_GENMASK64().
+ *
+ * @return: Masked and shifted value of the field defined by @__mask in @__val.
+ */
+#define REG_FIELD_GET64(__mask, __val)	((u64)FIELD_GET(__mask, __val))
+
 typedef struct {
 	u32 reg;
 } i915_reg_t;
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 5d94f86940f7739cd7877e11d31ba2602df8ad4e..582770360ad15bc9f47ebbd34f3ae5dba753f43b 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -44,6 +44,7 @@
 
 #include "i915_active.h"
 #include "i915_deps.h"
+#include "i915_driver.h"
 #include "i915_drv.h"
 #include "i915_trace.h"
 #include "intel_pm.h"
@@ -117,8 +118,10 @@ static void i915_fence_release(struct dma_fence *fence)
 		   rq->guc_prio != GUC_PRIO_FINI);
 
 	i915_request_free_capture_list(fetch_and_zero(&rq->capture_list));
-	if (i915_vma_snapshot_present(&rq->batch_snapshot))
-		i915_vma_snapshot_put_onstack(&rq->batch_snapshot);
+	if (rq->batch_res) {
+		i915_vma_resource_put(rq->batch_res);
+		rq->batch_res = NULL;
+	}
 
 	/*
 	 * The request is put onto a RCU freelist (i.e. the address
@@ -309,7 +312,7 @@ void i915_request_free_capture_list(struct i915_capture_list *capture)
 	while (capture) {
 		struct i915_capture_list *next = capture->next;
 
-		i915_vma_snapshot_put(capture->vma_snapshot);
+		i915_vma_resource_put(capture->vma_res);
 		kfree(capture);
 		capture = next;
 	}
@@ -855,7 +858,7 @@ static void __i915_request_ctor(void *arg)
 	i915_sw_fence_init(&rq->semaphore, semaphore_notify);
 
 	clear_capture_list(rq);
-	rq->batch_snapshot.present = false;
+	rq->batch_res = NULL;
 
 	init_llist_head(&rq->execute_cb);
 }
@@ -961,7 +964,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
 	__rq_init_watchdog(rq);
 	assert_capture_list_is_null(rq);
 	GEM_BUG_ON(!llist_empty(&rq->execute_cb));
-	GEM_BUG_ON(i915_vma_snapshot_present(&rq->batch_snapshot));
+	GEM_BUG_ON(rq->batch_res);
 
 	/*
 	 * Reserve space in the ring buffer for all the commands required to
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 170ee78c2858efcc5bd1034738223769b85591d2..28b1f9db54875944d59d3a59b1e85c6ff16d4aa6 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -40,7 +40,7 @@
 #include "i915_scheduler.h"
 #include "i915_selftest.h"
 #include "i915_sw_fence.h"
-#include "i915_vma_snapshot.h"
+#include "i915_vma_resource.h"
 
 #include <uapi/drm/i915_drm.h>
 
@@ -52,7 +52,7 @@ struct i915_request;
 
 #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
 struct i915_capture_list {
-	struct i915_vma_snapshot *vma_snapshot;
+	struct i915_vma_resource *vma_res;
 	struct i915_capture_list *next;
 };
 
@@ -300,7 +300,7 @@ struct i915_request {
 	/** Batch buffer pointer for selftest internal use. */
 	I915_SELFTEST_DECLARE(struct i915_vma *batch);
 
-	struct i915_vma_snapshot batch_snapshot;
+	struct i915_vma_resource *batch_res;
 
 #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
 	/**
diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c
index 31a105bc179223b22ecd65420cb7c8b87f2fda1d..c97323973f9b74a717ea61b4aedfe8685b619dd5 100644
--- a/drivers/gpu/drm/i915/i915_vgpu.c
+++ b/drivers/gpu/drm/i915/i915_vgpu.c
@@ -197,7 +197,7 @@ static int vgt_balloon_space(struct i915_ggtt *ggtt,
 	drm_info(&dev_priv->drm,
 		 "balloon space: range [ 0x%lx - 0x%lx ] %lu KiB.\n",
 		 start, end, size / 1024);
-	ret = i915_gem_gtt_reserve(&ggtt->vm, node,
+	ret = i915_gem_gtt_reserve(&ggtt->vm, NULL, node,
 				   size, start, I915_COLOR_UNEVICTABLE,
 				   0);
 	if (!ret)
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 68cf1d3922503e34beebfad7e21d00c66501f878..845cd88f8313c33643e65a708adeee429f863c07 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -38,6 +38,18 @@
 #include "i915_sw_fence_work.h"
 #include "i915_trace.h"
 #include "i915_vma.h"
+#include "i915_vma_resource.h"
+
+static inline void assert_vma_held_evict(const struct i915_vma *vma)
+{
+	/*
+	 * We may be forced to unbind when the vm is dead, to clean it up.
+	 * This is the only exception to the requirement of the object lock
+	 * being held.
+	 */
+	if (atomic_read(&vma->vm->open))
+		assert_object_held_shared(vma->obj);
+}
 
 static struct kmem_cache *slab_vmas;
 
@@ -285,7 +297,7 @@ struct i915_vma_work {
 	struct dma_fence_work base;
 	struct i915_address_space *vm;
 	struct i915_vm_pt_stash stash;
-	struct i915_vma *vma;
+	struct i915_vma_resource *vma_res;
 	struct drm_i915_gem_object *pinned;
 	struct i915_sw_dma_fence_cb cb;
 	enum i915_cache_level cache_level;
@@ -295,23 +307,24 @@ struct i915_vma_work {
 static void __vma_bind(struct dma_fence_work *work)
 {
 	struct i915_vma_work *vw = container_of(work, typeof(*vw), base);
-	struct i915_vma *vma = vw->vma;
+	struct i915_vma_resource *vma_res = vw->vma_res;
+
+	vma_res->ops->bind_vma(vma_res->vm, &vw->stash,
+			       vma_res, vw->cache_level, vw->flags);
 
-	vma->ops->bind_vma(vw->vm, &vw->stash,
-			   vma, vw->cache_level, vw->flags);
 }
 
 static void __vma_release(struct dma_fence_work *work)
 {
 	struct i915_vma_work *vw = container_of(work, typeof(*vw), base);
 
-	if (vw->pinned) {
-		__i915_gem_object_unpin_pages(vw->pinned);
+	if (vw->pinned)
 		i915_gem_object_put(vw->pinned);
-	}
 
 	i915_vm_free_pt_stash(vw->vm, &vw->stash);
 	i915_vm_put(vw->vm);
+	if (vw->vma_res)
+		i915_vma_resource_put(vw->vma_res);
 }
 
 static const struct dma_fence_work_ops bind_ops = {
@@ -375,12 +388,27 @@ static int i915_vma_verify_bind_complete(struct i915_vma *vma)
 #define i915_vma_verify_bind_complete(_vma) 0
 #endif
 
+I915_SELFTEST_EXPORT void
+i915_vma_resource_init_from_vma(struct i915_vma_resource *vma_res,
+				struct i915_vma *vma)
+{
+	struct drm_i915_gem_object *obj = vma->obj;
+
+	i915_vma_resource_init(vma_res, vma->vm, vma->pages, &vma->page_sizes,
+			       obj->mm.rsgt, i915_gem_object_is_readonly(obj),
+			       i915_gem_object_is_lmem(obj), obj->mm.region,
+			       vma->ops, vma->private, vma->node.start,
+			       vma->node.size, vma->size);
+}
+
 /**
  * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
  * @vma: VMA to map
  * @cache_level: mapping cache level
  * @flags: flags like global or local mapping
  * @work: preallocated worker for allocating and binding the PTE
+ * @vma_res: pointer to a preallocated vma resource. The resource is either
+ * consumed or freed.
  *
  * DMA addresses are taken from the scatter-gather table of this object (or of
  * this VMA in case of non-default GGTT views) and PTE entries set up.
@@ -389,10 +417,12 @@ static int i915_vma_verify_bind_complete(struct i915_vma *vma)
 int i915_vma_bind(struct i915_vma *vma,
 		  enum i915_cache_level cache_level,
 		  u32 flags,
-		  struct i915_vma_work *work)
+		  struct i915_vma_work *work,
+		  struct i915_vma_resource *vma_res)
 {
 	u32 bind_flags;
 	u32 vma_flags;
+	int ret;
 
 	lockdep_assert_held(&vma->vm->mutex);
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
@@ -400,11 +430,15 @@ int i915_vma_bind(struct i915_vma *vma,
 
 	if (GEM_DEBUG_WARN_ON(range_overflows(vma->node.start,
 					      vma->node.size,
-					      vma->vm->total)))
+					      vma->vm->total))) {
+		i915_vma_resource_free(vma_res);
 		return -ENODEV;
+	}
 
-	if (GEM_DEBUG_WARN_ON(!flags))
+	if (GEM_DEBUG_WARN_ON(!flags)) {
+		i915_vma_resource_free(vma_res);
 		return -EINVAL;
+	}
 
 	bind_flags = flags;
 	bind_flags &= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
@@ -413,16 +447,44 @@ int i915_vma_bind(struct i915_vma *vma,
 	vma_flags &= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
 
 	bind_flags &= ~vma_flags;
-	if (bind_flags == 0)
+	if (bind_flags == 0) {
+		i915_vma_resource_free(vma_res);
 		return 0;
+	}
 
 	GEM_BUG_ON(!atomic_read(&vma->pages_count));
 
+	/* Wait for or await async unbinds touching our range */
+	if (work && bind_flags & vma->vm->bind_async_flags)
+		ret = i915_vma_resource_bind_dep_await(vma->vm,
+						       &work->base.chain,
+						       vma->node.start,
+						       vma->node.size,
+						       true,
+						       GFP_NOWAIT |
+						       __GFP_RETRY_MAYFAIL |
+						       __GFP_NOWARN);
+	else
+		ret = i915_vma_resource_bind_dep_sync(vma->vm, vma->node.start,
+						      vma->node.size, true);
+	if (ret) {
+		i915_vma_resource_free(vma_res);
+		return ret;
+	}
+
+	if (vma->resource || !vma_res) {
+		/* Rebinding with an additional I915_VMA_*_BIND */
+		GEM_WARN_ON(!vma_flags);
+		i915_vma_resource_free(vma_res);
+	} else {
+		i915_vma_resource_init_from_vma(vma_res, vma);
+		vma->resource = vma_res;
+	}
 	trace_i915_vma_bind(vma, bind_flags);
 	if (work && bind_flags & vma->vm->bind_async_flags) {
 		struct dma_fence *prev;
 
-		work->vma = vma;
+		work->vma_res = i915_vma_resource_get(vma->resource);
 		work->cache_level = cache_level;
 		work->flags = bind_flags;
 
@@ -445,17 +507,25 @@ int i915_vma_bind(struct i915_vma *vma,
 
 		work->base.dma.error = 0; /* enable the queue_work() */
 
-		__i915_gem_object_pin_pages(vma->obj);
-		work->pinned = i915_gem_object_get(vma->obj);
+		/*
+		 * If we don't have the refcounted pages list, keep a reference
+		 * on the object to avoid waiting for the async bind to
+		 * complete in the object destruction path.
+		 */
+		if (!work->vma_res->bi.pages_rsgt)
+			work->pinned = i915_gem_object_get(vma->obj);
 	} else {
 		if (vma->obj) {
-			int ret;
-
 			ret = i915_gem_object_wait_moving_fence(vma->obj, true);
-			if (ret)
+			if (ret) {
+				i915_vma_resource_free(vma->resource);
+				vma->resource = NULL;
+
 				return ret;
+			}
 		}
-		vma->ops->bind_vma(vma->vm, NULL, vma, cache_level, bind_flags);
+		vma->ops->bind_vma(vma->vm, NULL, vma->resource, cache_level,
+				   bind_flags);
 	}
 
 	if (vma->obj)
@@ -655,7 +725,8 @@ bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long color)
  * 0 on success, negative error code otherwise.
  */
 static int
-i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
+i915_vma_insert(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
+		u64 size, u64 alignment, u64 flags)
 {
 	unsigned long color;
 	u64 start, end;
@@ -707,7 +778,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 		    range_overflows(offset, size, end))
 			return -EINVAL;
 
-		ret = i915_gem_gtt_reserve(vma->vm, &vma->node,
+		ret = i915_gem_gtt_reserve(vma->vm, ww, &vma->node,
 					   size, offset, color,
 					   flags);
 		if (ret)
@@ -746,7 +817,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 				size = round_up(size, I915_GTT_PAGE_SIZE_2M);
 		}
 
-		ret = i915_gem_gtt_insert(vma->vm, &vma->node,
+		ret = i915_gem_gtt_insert(vma->vm, ww, &vma->node,
 					  size, alignment, color,
 					  start, end, flags);
 		if (ret)
@@ -780,9 +851,17 @@ i915_vma_detach(struct i915_vma *vma)
 static bool try_qad_pin(struct i915_vma *vma, unsigned int flags)
 {
 	unsigned int bound;
-	bool pinned = true;
 
 	bound = atomic_read(&vma->flags);
+
+	if (flags & PIN_VALIDATE) {
+		flags &= I915_VMA_BIND_MASK;
+
+		return (flags & bound) == flags;
+	}
+
+	/* with the lock mandatory for unbind, we don't race here */
+	flags &= I915_VMA_BIND_MASK;
 	do {
 		if (unlikely(flags & ~bound))
 			return false;
@@ -790,34 +869,10 @@ static bool try_qad_pin(struct i915_vma *vma, unsigned int flags)
 		if (unlikely(bound & (I915_VMA_OVERFLOW | I915_VMA_ERROR)))
 			return false;
 
-		if (!(bound & I915_VMA_PIN_MASK))
-			goto unpinned;
-
 		GEM_BUG_ON(((bound + 1) & I915_VMA_PIN_MASK) == 0);
 	} while (!atomic_try_cmpxchg(&vma->flags, &bound, bound + 1));
 
 	return true;
-
-unpinned:
-	/*
-	 * If pin_count==0, but we are bound, check under the lock to avoid
-	 * racing with a concurrent i915_vma_unbind().
-	 */
-	mutex_lock(&vma->vm->mutex);
-	do {
-		if (unlikely(bound & (I915_VMA_OVERFLOW | I915_VMA_ERROR))) {
-			pinned = false;
-			break;
-		}
-
-		if (unlikely(flags & ~bound)) {
-			pinned = false;
-			break;
-		}
-	} while (!atomic_try_cmpxchg(&vma->flags, &bound, bound + 1));
-	mutex_unlock(&vma->vm->mutex);
-
-	return pinned;
 }
 
 static struct scatterlist *
@@ -913,30 +968,39 @@ intel_rotate_pages(struct intel_rotation_info *rot_info,
 }
 
 static struct scatterlist *
-remap_pages(struct drm_i915_gem_object *obj,
-	    unsigned int offset, unsigned int alignment_pad,
-	    unsigned int width, unsigned int height,
-	    unsigned int src_stride, unsigned int dst_stride,
-	    struct sg_table *st, struct scatterlist *sg)
+add_padding_pages(unsigned int count,
+		  struct sg_table *st, struct scatterlist *sg)
+{
+	st->nents++;
+
+	/*
+	 * The DE ignores the PTEs for the padding tiles, the sg entry
+	 * here is just a convenience to indicate how many padding PTEs
+	 * to insert at this spot.
+	 */
+	sg_set_page(sg, NULL, count * I915_GTT_PAGE_SIZE, 0);
+	sg_dma_address(sg) = 0;
+	sg_dma_len(sg) = count * I915_GTT_PAGE_SIZE;
+	sg = sg_next(sg);
+
+	return sg;
+}
+
+static struct scatterlist *
+remap_tiled_color_plane_pages(struct drm_i915_gem_object *obj,
+			      unsigned int offset, unsigned int alignment_pad,
+			      unsigned int width, unsigned int height,
+			      unsigned int src_stride, unsigned int dst_stride,
+			      struct sg_table *st, struct scatterlist *sg,
+			      unsigned int *gtt_offset)
 {
 	unsigned int row;
 
 	if (!width || !height)
 		return sg;
 
-	if (alignment_pad) {
-		st->nents++;
-
-		/*
-		 * The DE ignores the PTEs for the padding tiles, the sg entry
-		 * here is just a convenience to indicate how many padding PTEs
-		 * to insert at this spot.
-		 */
-		sg_set_page(sg, NULL, alignment_pad * 4096, 0);
-		sg_dma_address(sg) = 0;
-		sg_dma_len(sg) = alignment_pad * 4096;
-		sg = sg_next(sg);
-	}
+	if (alignment_pad)
+		sg = add_padding_pages(alignment_pad, st, sg);
 
 	for (row = 0; row < height; row++) {
 		unsigned int left = width * I915_GTT_PAGE_SIZE;
@@ -973,18 +1037,98 @@ remap_pages(struct drm_i915_gem_object *obj,
 		if (!left)
 			continue;
 
+		sg = add_padding_pages(left >> PAGE_SHIFT, st, sg);
+	}
+
+	*gtt_offset += alignment_pad + dst_stride * height;
+
+	return sg;
+}
+
+static struct scatterlist *
+remap_contiguous_pages(struct drm_i915_gem_object *obj,
+		       unsigned int obj_offset,
+		       unsigned int count,
+		       struct sg_table *st, struct scatterlist *sg)
+{
+	struct scatterlist *iter;
+	unsigned int offset;
+
+	iter = i915_gem_object_get_sg_dma(obj, obj_offset, &offset);
+	GEM_BUG_ON(!iter);
+
+	do {
+		unsigned int len;
+
+		len = min(sg_dma_len(iter) - (offset << PAGE_SHIFT),
+			  count << PAGE_SHIFT);
+		sg_set_page(sg, NULL, len, 0);
+		sg_dma_address(sg) =
+			sg_dma_address(iter) + (offset << PAGE_SHIFT);
+		sg_dma_len(sg) = len;
+
 		st->nents++;
+		count -= len >> PAGE_SHIFT;
+		if (count == 0)
+			return sg;
 
-		/*
-		 * The DE ignores the PTEs for the padding tiles, the sg entry
-		 * here is just a conenience to indicate how many padding PTEs
-		 * to insert at this spot.
-		 */
-		sg_set_page(sg, NULL, left, 0);
-		sg_dma_address(sg) = 0;
-		sg_dma_len(sg) = left;
-		sg = sg_next(sg);
-	}
+		sg = __sg_next(sg);
+		iter = __sg_next(iter);
+		offset = 0;
+	} while (1);
+}
+
+static struct scatterlist *
+remap_linear_color_plane_pages(struct drm_i915_gem_object *obj,
+			       unsigned int obj_offset, unsigned int alignment_pad,
+			       unsigned int size,
+			       struct sg_table *st, struct scatterlist *sg,
+			       unsigned int *gtt_offset)
+{
+	if (!size)
+		return sg;
+
+	if (alignment_pad)
+		sg = add_padding_pages(alignment_pad, st, sg);
+
+	sg = remap_contiguous_pages(obj, obj_offset, size, st, sg);
+	sg = sg_next(sg);
+
+	*gtt_offset += alignment_pad + size;
+
+	return sg;
+}
+
+static struct scatterlist *
+remap_color_plane_pages(const struct intel_remapped_info *rem_info,
+			struct drm_i915_gem_object *obj,
+			int color_plane,
+			struct sg_table *st, struct scatterlist *sg,
+			unsigned int *gtt_offset)
+{
+	unsigned int alignment_pad = 0;
+
+	if (rem_info->plane_alignment)
+		alignment_pad = ALIGN(*gtt_offset, rem_info->plane_alignment) - *gtt_offset;
+
+	if (rem_info->plane[color_plane].linear)
+		sg = remap_linear_color_plane_pages(obj,
+						    rem_info->plane[color_plane].offset,
+						    alignment_pad,
+						    rem_info->plane[color_plane].size,
+						    st, sg,
+						    gtt_offset);
+
+	else
+		sg = remap_tiled_color_plane_pages(obj,
+						   rem_info->plane[color_plane].offset,
+						   alignment_pad,
+						   rem_info->plane[color_plane].width,
+						   rem_info->plane[color_plane].height,
+						   rem_info->plane[color_plane].src_stride,
+						   rem_info->plane[color_plane].dst_stride,
+						   st, sg,
+						   gtt_offset);
 
 	return sg;
 }
@@ -1013,21 +1157,8 @@ intel_remap_pages(struct intel_remapped_info *rem_info,
 	st->nents = 0;
 	sg = st->sgl;
 
-	for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++) {
-		unsigned int alignment_pad = 0;
-
-		if (rem_info->plane_alignment)
-			alignment_pad = ALIGN(gtt_offset, rem_info->plane_alignment) - gtt_offset;
-
-		sg = remap_pages(obj,
-				 rem_info->plane[i].offset, alignment_pad,
-				 rem_info->plane[i].width, rem_info->plane[i].height,
-				 rem_info->plane[i].src_stride, rem_info->plane[i].dst_stride,
-				 st, sg);
-
-		gtt_offset += alignment_pad +
-			      rem_info->plane[i].dst_stride * rem_info->plane[i].height;
-	}
+	for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++)
+		sg = remap_color_plane_pages(rem_info, obj, i, st, sg, &gtt_offset);
 
 	i915_sg_trim(st);
 
@@ -1049,9 +1180,8 @@ intel_partial_pages(const struct i915_ggtt_view *view,
 		    struct drm_i915_gem_object *obj)
 {
 	struct sg_table *st;
-	struct scatterlist *sg, *iter;
+	struct scatterlist *sg;
 	unsigned int count = view->partial.size;
-	unsigned int offset;
 	int ret = -ENOMEM;
 
 	st = kmalloc(sizeof(*st), GFP_KERNEL);
@@ -1062,34 +1192,14 @@ intel_partial_pages(const struct i915_ggtt_view *view,
 	if (ret)
 		goto err_sg_alloc;
 
-	iter = i915_gem_object_get_sg_dma(obj, view->partial.offset, &offset);
-	GEM_BUG_ON(!iter);
-
-	sg = st->sgl;
 	st->nents = 0;
-	do {
-		unsigned int len;
-
-		len = min(sg_dma_len(iter) - (offset << PAGE_SHIFT),
-			  count << PAGE_SHIFT);
-		sg_set_page(sg, NULL, len, 0);
-		sg_dma_address(sg) =
-			sg_dma_address(iter) + (offset << PAGE_SHIFT);
-		sg_dma_len(sg) = len;
 
-		st->nents++;
-		count -= len >> PAGE_SHIFT;
-		if (count == 0) {
-			sg_mark_end(sg);
-			i915_sg_trim(st); /* Drop any unused tail entries. */
+	sg = remap_contiguous_pages(obj, view->partial.offset, count, st, st->sgl);
 
-			return st;
-		}
+	sg_mark_end(sg);
+	i915_sg_trim(st); /* Drop any unused tail entries. */
 
-		sg = __sg_next(sg);
-		iter = __sg_next(iter);
-		offset = 0;
-	} while (1);
+	return st;
 
 err_sg_alloc:
 	kfree(st);
@@ -1101,7 +1211,6 @@ static int
 __i915_vma_get_pages(struct i915_vma *vma)
 {
 	struct sg_table *pages;
-	int ret;
 
 	/*
 	 * The vma->pages are only valid within the lifespan of the borrowed
@@ -1134,18 +1243,16 @@ __i915_vma_get_pages(struct i915_vma *vma)
 		break;
 	}
 
-	ret = 0;
 	if (IS_ERR(pages)) {
-		ret = PTR_ERR(pages);
-		pages = NULL;
 		drm_err(&vma->vm->i915->drm,
-			"Failed to get pages for VMA view type %u (%d)!\n",
-			vma->ggtt_view.type, ret);
+			"Failed to get pages for VMA view type %u (%ld)!\n",
+			vma->ggtt_view.type, PTR_ERR(pages));
+		return PTR_ERR(pages);
 	}
 
 	vma->pages = pages;
 
-	return ret;
+	return 0;
 }
 
 I915_SELFTEST_EXPORT int i915_vma_get_pages(struct i915_vma *vma)
@@ -1177,25 +1284,14 @@ I915_SELFTEST_EXPORT int i915_vma_get_pages(struct i915_vma *vma)
 static void __vma_put_pages(struct i915_vma *vma, unsigned int count)
 {
 	/* We allocate under vma_get_pages, so beware the shrinker */
-	struct sg_table *pages = READ_ONCE(vma->pages);
-
 	GEM_BUG_ON(atomic_read(&vma->pages_count) < count);
 
 	if (atomic_sub_return(count, &vma->pages_count) == 0) {
-		/*
-		 * The atomic_sub_return is a read barrier for the READ_ONCE of
-		 * vma->pages above.
-		 *
-		 * READ_ONCE is safe because this is either called from the same
-		 * function (i915_vma_pin_ww), or guarded by vma->vm->mutex.
-		 *
-		 * TODO: We're leaving vma->pages dangling, until vma->obj->resv
-		 * lock is required.
-		 */
-		if (pages != vma->obj->mm.pages) {
-			sg_free_table(pages);
-			kfree(pages);
+		if (vma->pages != vma->obj->mm.pages) {
+			sg_free_table(vma->pages);
+			kfree(vma->pages);
 		}
+		vma->pages = NULL;
 
 		i915_gem_object_unpin_pages(vma->obj);
 	}
@@ -1228,6 +1324,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 {
 	struct i915_vma_work *work = NULL;
 	struct dma_fence *moving = NULL;
+	struct i915_vma_resource *vma_res = NULL;
 	intel_wakeref_t wakeref = 0;
 	unsigned int bound;
 	int err;
@@ -1241,7 +1338,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 	GEM_BUG_ON(!(flags & (PIN_USER | PIN_GLOBAL)));
 
 	/* First try and grab the pin without rebinding the vma */
-	if (try_qad_pin(vma, flags & I915_VMA_BIND_MASK))
+	if (try_qad_pin(vma, flags))
 		return 0;
 
 	err = i915_vma_get_pages(vma);
@@ -1282,6 +1379,12 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 		}
 	}
 
+	vma_res = i915_vma_resource_alloc();
+	if (IS_ERR(vma_res)) {
+		err = PTR_ERR(vma_res);
+		goto err_fence;
+	}
+
 	/*
 	 * Differentiate between user/kernel vma inside the aliasing-ppgtt.
 	 *
@@ -1302,7 +1405,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 	err = mutex_lock_interruptible_nested(&vma->vm->mutex,
 					      !(flags & PIN_GLOBAL));
 	if (err)
-		goto err_fence;
+		goto err_vma_res;
 
 	/* No more allocations allowed now we hold vm->mutex */
 
@@ -1323,7 +1426,8 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 	}
 
 	if (unlikely(!(flags & ~bound & I915_VMA_BIND_MASK))) {
-		__i915_vma_pin(vma);
+		if (!(flags & PIN_VALIDATE))
+			__i915_vma_pin(vma);
 		goto err_unlock;
 	}
 
@@ -1332,7 +1436,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 		goto err_unlock;
 
 	if (!(bound & I915_VMA_BIND_MASK)) {
-		err = i915_vma_insert(vma, size, alignment, flags);
+		err = i915_vma_insert(vma, ww, size, alignment, flags);
 		if (err)
 			goto err_active;
 
@@ -1343,7 +1447,8 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 	GEM_BUG_ON(!vma->pages);
 	err = i915_vma_bind(vma,
 			    vma->obj->cache_level,
-			    flags, work);
+			    flags, work, vma_res);
+	vma_res = NULL;
 	if (err)
 		goto err_remove;
 
@@ -1352,8 +1457,10 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 	atomic_add(I915_VMA_PAGES_ACTIVE, &vma->pages_count);
 	list_move_tail(&vma->vm_link, &vma->vm->bound_list);
 
-	__i915_vma_pin(vma);
-	GEM_BUG_ON(!i915_vma_is_pinned(vma));
+	if (!(flags & PIN_VALIDATE)) {
+		__i915_vma_pin(vma);
+		GEM_BUG_ON(!i915_vma_is_pinned(vma));
+	}
 	GEM_BUG_ON(!i915_vma_is_bound(vma, flags));
 	GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags));
 
@@ -1366,6 +1473,8 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 	i915_active_release(&vma->active);
 err_unlock:
 	mutex_unlock(&vma->vm->mutex);
+err_vma_res:
+	i915_vma_resource_free(vma_res);
 err_fence:
 	if (work)
 		dma_fence_work_commit_imm(&work->base);
@@ -1412,7 +1521,12 @@ static int __i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
 		/* Unlike i915_vma_pin, we don't take no for an answer! */
 		flush_idle_contexts(vm->gt);
 		if (mutex_lock_interruptible(&vm->mutex) == 0) {
-			i915_gem_evict_vm(vm);
+			/*
+			 * We pass NULL ww here, as we don't want to unbind
+			 * locked objects when called from execbuf when pinning
+			 * is removed. This would probably regress badly.
+			 */
+			i915_gem_evict_vm(vm, NULL);
 			mutex_unlock(&vm->mutex);
 		}
 	} while (1);
@@ -1516,6 +1630,7 @@ void i915_vma_release(struct kref *ref)
 	i915_vm_put(vma->vm);
 
 	i915_active_fini(&vma->active);
+	GEM_WARN_ON(vma->resource);
 	i915_vma_free(vma);
 }
 
@@ -1548,8 +1663,16 @@ void i915_vma_parked(struct intel_gt *gt)
 		struct drm_i915_gem_object *obj = vma->obj;
 		struct i915_address_space *vm = vma->vm;
 
-		INIT_LIST_HEAD(&vma->closed_link);
-		__i915_vma_put(vma);
+		if (i915_gem_object_trylock(obj, NULL)) {
+			INIT_LIST_HEAD(&vma->closed_link);
+			__i915_vma_put(vma);
+			i915_gem_object_unlock(obj);
+		} else {
+			/* back you go.. */
+			spin_lock_irq(&gt->closed_lock);
+			list_add(&vma->closed_link, &gt->closed_vma);
+			spin_unlock_irq(&gt->closed_lock);
+		}
 
 		i915_gem_object_put(obj);
 		i915_vm_close(vm);
@@ -1600,8 +1723,6 @@ static int __i915_vma_move_to_active(struct i915_vma *vma, struct i915_request *
 {
 	int err;
 
-	GEM_BUG_ON(!i915_vma_is_pinned(vma));
-
 	/* Wait for the vma to be bound before we start! */
 	err = __i915_request_await_bind(rq, vma);
 	if (err)
@@ -1620,6 +1741,8 @@ int _i915_vma_move_to_active(struct i915_vma *vma,
 
 	assert_object_held(obj);
 
+	GEM_BUG_ON(!vma->pages);
+
 	err = __i915_vma_move_to_active(vma, rq);
 	if (unlikely(err))
 		return err;
@@ -1662,9 +1785,13 @@ int _i915_vma_move_to_active(struct i915_vma *vma,
 	return 0;
 }
 
-void __i915_vma_evict(struct i915_vma *vma)
+struct dma_fence *__i915_vma_evict(struct i915_vma *vma, bool async)
 {
+	struct i915_vma_resource *vma_res = vma->resource;
+	struct dma_fence *unbind_fence;
+
 	GEM_BUG_ON(i915_vma_is_pinned(vma));
+	assert_vma_held_evict(vma);
 
 	if (i915_vma_is_map_and_fenceable(vma)) {
 		/* Force a pagefault for domain tracking on next user access */
@@ -1694,15 +1821,36 @@ void __i915_vma_evict(struct i915_vma *vma)
 	GEM_BUG_ON(vma->fence);
 	GEM_BUG_ON(i915_vma_has_userfault(vma));
 
-	if (likely(atomic_read(&vma->vm->open))) {
-		trace_i915_vma_unbind(vma);
-		vma->ops->unbind_vma(vma->vm, vma);
-	}
+	/* Object backend must be async capable. */
+	GEM_WARN_ON(async && !vma->resource->bi.pages_rsgt);
+
+	/* If vm is not open, unbind is a nop. */
+	vma_res->needs_wakeref = i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND) &&
+		atomic_read(&vma->vm->open);
+	trace_i915_vma_unbind(vma);
+
+	unbind_fence = i915_vma_resource_unbind(vma_res);
+	vma->resource = NULL;
+
 	atomic_and(~(I915_VMA_BIND_MASK | I915_VMA_ERROR | I915_VMA_GGTT_WRITE),
 		   &vma->flags);
 
 	i915_vma_detach(vma);
+
+	if (!async && unbind_fence) {
+		dma_fence_wait(unbind_fence, false);
+		dma_fence_put(unbind_fence);
+		unbind_fence = NULL;
+	}
+
+	/*
+	 * Binding itself may not have completed until the unbind fence signals,
+	 * so don't drop the pages until that happens, unless the resource is
+	 * async_capable.
+	 */
+
 	vma_unbind_pages(vma);
+	return unbind_fence;
 }
 
 int __i915_vma_unbind(struct i915_vma *vma)
@@ -1710,6 +1858,7 @@ int __i915_vma_unbind(struct i915_vma *vma)
 	int ret;
 
 	lockdep_assert_held(&vma->vm->mutex);
+	assert_vma_held_evict(vma);
 
 	if (!drm_mm_node_allocated(&vma->node))
 		return 0;
@@ -1729,18 +1878,55 @@ int __i915_vma_unbind(struct i915_vma *vma)
 		return ret;
 
 	GEM_BUG_ON(i915_vma_is_active(vma));
-	__i915_vma_evict(vma);
+	__i915_vma_evict(vma, false);
 
 	drm_mm_remove_node(&vma->node); /* pairs with i915_vma_release() */
 	return 0;
 }
 
+static struct dma_fence *__i915_vma_unbind_async(struct i915_vma *vma)
+{
+	struct dma_fence *fence;
+
+	lockdep_assert_held(&vma->vm->mutex);
+
+	if (!drm_mm_node_allocated(&vma->node))
+		return NULL;
+
+	if (i915_vma_is_pinned(vma) ||
+	    &vma->obj->mm.rsgt->table != vma->resource->bi.pages)
+		return ERR_PTR(-EAGAIN);
+
+	/*
+	 * We probably need to replace this with awaiting the fences of the
+	 * object's dma_resv when the vma active goes away. When doing that
+	 * we need to be careful to not add the vma_resource unbind fence
+	 * immediately to the object's dma_resv, because then unbinding
+	 * the next vma from the object, in case there are many, will
+	 * actually await the unbinding of the previous vmas, which is
+	 * undesirable.
+	 */
+	if (i915_sw_fence_await_active(&vma->resource->chain, &vma->active,
+				       I915_ACTIVE_AWAIT_EXCL |
+				       I915_ACTIVE_AWAIT_ACTIVE) < 0) {
+		return ERR_PTR(-EBUSY);
+	}
+
+	fence = __i915_vma_evict(vma, true);
+
+	drm_mm_remove_node(&vma->node); /* pairs with i915_vma_release() */
+
+	return fence;
+}
+
 int i915_vma_unbind(struct i915_vma *vma)
 {
 	struct i915_address_space *vm = vma->vm;
 	intel_wakeref_t wakeref = 0;
 	int err;
 
+	assert_object_held_shared(vma->obj);
+
 	/* Optimistic wait before taking the mutex */
 	err = i915_vma_sync(vma);
 	if (err)
@@ -1771,6 +1957,79 @@ int i915_vma_unbind(struct i915_vma *vma)
 	return err;
 }
 
+int i915_vma_unbind_async(struct i915_vma *vma, bool trylock_vm)
+{
+	struct drm_i915_gem_object *obj = vma->obj;
+	struct i915_address_space *vm = vma->vm;
+	intel_wakeref_t wakeref = 0;
+	struct dma_fence *fence;
+	int err;
+
+	/*
+	 * We need the dma-resv lock since we add the
+	 * unbind fence to the dma-resv object.
+	 */
+	assert_object_held(obj);
+
+	if (!drm_mm_node_allocated(&vma->node))
+		return 0;
+
+	if (i915_vma_is_pinned(vma)) {
+		vma_print_allocator(vma, "is pinned");
+		return -EAGAIN;
+	}
+
+	if (!obj->mm.rsgt)
+		return -EBUSY;
+
+	err = dma_resv_reserve_shared(obj->base.resv, 1);
+	if (err)
+		return -EBUSY;
+
+	/*
+	 * It would be great if we could grab this wakeref from the
+	 * async unbind work if needed, but we can't because it uses
+	 * kmalloc and it's in the dma-fence signalling critical path.
+	 */
+	if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
+		wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm);
+
+	if (trylock_vm && !mutex_trylock(&vm->mutex)) {
+		err = -EBUSY;
+		goto out_rpm;
+	} else if (!trylock_vm) {
+		err = mutex_lock_interruptible_nested(&vm->mutex, !wakeref);
+		if (err)
+			goto out_rpm;
+	}
+
+	fence = __i915_vma_unbind_async(vma);
+	mutex_unlock(&vm->mutex);
+	if (IS_ERR_OR_NULL(fence)) {
+		err = PTR_ERR_OR_ZERO(fence);
+		goto out_rpm;
+	}
+
+	dma_resv_add_shared_fence(obj->base.resv, fence);
+	dma_fence_put(fence);
+
+out_rpm:
+	if (wakeref)
+		intel_runtime_pm_put(&vm->i915->runtime_pm, wakeref);
+	return err;
+}
+
+int i915_vma_unbind_unlocked(struct i915_vma *vma)
+{
+	int err;
+
+	i915_gem_object_lock(vma->obj, NULL);
+	err = i915_vma_unbind(vma);
+	i915_gem_object_unlock(vma->obj);
+
+	return err;
+}
+
 struct i915_vma *i915_vma_make_unshrinkable(struct i915_vma *vma)
 {
 	i915_gem_object_make_unshrinkable(vma->obj);
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 32719431b3df22ca039d04960ecce462d3e0ee42..011af044ad4fc8aba9593d26b5c070ba815c5b73 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -37,6 +37,7 @@
 
 #include "i915_active.h"
 #include "i915_request.h"
+#include "i915_vma_resource.h"
 #include "i915_vma_types.h"
 
 struct i915_vma *
@@ -204,16 +205,19 @@ struct i915_vma_work *i915_vma_work(void);
 int i915_vma_bind(struct i915_vma *vma,
 		  enum i915_cache_level cache_level,
 		  u32 flags,
-		  struct i915_vma_work *work);
+		  struct i915_vma_work *work,
+		  struct i915_vma_resource *vma_res);
 
 bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long color);
 bool i915_vma_misplaced(const struct i915_vma *vma,
 			u64 size, u64 alignment, u64 flags);
 void __i915_vma_set_map_and_fenceable(struct i915_vma *vma);
 void i915_vma_revoke_mmap(struct i915_vma *vma);
-void __i915_vma_evict(struct i915_vma *vma);
+struct dma_fence *__i915_vma_evict(struct i915_vma *vma, bool async);
 int __i915_vma_unbind(struct i915_vma *vma);
 int __must_check i915_vma_unbind(struct i915_vma *vma);
+int __must_check i915_vma_unbind_async(struct i915_vma *vma, bool trylock_vm);
+int __must_check i915_vma_unbind_unlocked(struct i915_vma *vma);
 void i915_vma_unlink_ctx(struct i915_vma *vma);
 void i915_vma_close(struct i915_vma *vma);
 void i915_vma_reopen(struct i915_vma *vma);
@@ -337,12 +341,6 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma);
  */
 void i915_vma_unpin_iomap(struct i915_vma *vma);
 
-static inline struct page *i915_vma_first_page(struct i915_vma *vma)
-{
-	GEM_BUG_ON(!vma->pages);
-	return sg_page(vma->pages->sgl);
-}
-
 /**
  * i915_vma_pin_fence - pin fencing state
  * @vma: vma to pin fencing for
@@ -428,6 +426,26 @@ static inline int i915_vma_sync(struct i915_vma *vma)
 	return i915_active_wait(&vma->active);
 }
 
+/**
+ * i915_vma_get_current_resource - Get the current resource of the vma
+ * @vma: The vma to get the current resource from.
+ *
+ * It's illegal to call this function if the vma is not bound.
+ *
+ * Return: A refcounted pointer to the current vma resource
+ * of the vma, assuming the vma is bound.
+ */
+static inline struct i915_vma_resource *
+i915_vma_get_current_resource(struct i915_vma *vma)
+{
+	return i915_vma_resource_get(vma->resource);
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+void i915_vma_resource_init_from_vma(struct i915_vma_resource *vma_res,
+				     struct i915_vma *vma);
+#endif
+
 void i915_vma_module_exit(void);
 int i915_vma_module_init(void);
 
diff --git a/drivers/gpu/drm/i915/i915_vma_resource.c b/drivers/gpu/drm/i915/i915_vma_resource.c
new file mode 100644
index 0000000000000000000000000000000000000000..57ae92ba8af1ba4a5318559e280dacd6e48c36b6
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_vma_resource.c
@@ -0,0 +1,418 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include <linux/interval_tree_generic.h>
+#include <linux/sched/mm.h>
+
+#include "i915_sw_fence.h"
+#include "i915_vma_resource.h"
+#include "i915_drv.h"
+#include "intel_memory_region.h"
+
+#include "gt/intel_gtt.h"
+
+static struct kmem_cache *slab_vma_resources;
+
+/**
+ * DOC:
+ * We use a per-vm interval tree to keep track of vma_resources
+ * scheduled for unbind but not yet unbound. The tree is protected by
+ * the vm mutex, and nodes are removed just after the unbind fence signals.
+ * The removal takes the vm mutex from a kernel thread which we need to
+ * keep in mind so that we don't grab the mutex and try to wait for all
+ * pending unbinds to complete, because that will temporaryily block many
+ * of the workqueue threads, and people will get angry.
+ *
+ * We should consider using a single ordered fence per VM instead but that
+ * requires ordering the unbinds and might introduce unnecessary waiting
+ * for unrelated unbinds. Amount of code will probably be roughly the same
+ * due to the simplicity of using the interval tree interface.
+ *
+ * Another drawback of this interval tree is that the complexity of insertion
+ * and removal of fences increases as O(ln(pending_unbinds)) instead of
+ * O(1) for a single fence without interval tree.
+ */
+#define VMA_RES_START(_node) ((_node)->start)
+#define VMA_RES_LAST(_node) ((_node)->start + (_node)->node_size - 1)
+INTERVAL_TREE_DEFINE(struct i915_vma_resource, rb,
+		     u64, __subtree_last,
+		     VMA_RES_START, VMA_RES_LAST, static, vma_res_itree);
+
+/* Callbacks for the unbind dma-fence. */
+
+/**
+ * i915_vma_resource_alloc - Allocate a vma resource
+ *
+ * Return: A pointer to a cleared struct i915_vma_resource or
+ * a -ENOMEM error pointer if allocation fails.
+ */
+struct i915_vma_resource *i915_vma_resource_alloc(void)
+{
+	struct i915_vma_resource *vma_res =
+		kmem_cache_zalloc(slab_vma_resources, GFP_KERNEL);
+
+	return vma_res ? vma_res : ERR_PTR(-ENOMEM);
+}
+
+/**
+ * i915_vma_resource_free - Free a vma resource
+ * @vma_res: The vma resource to free.
+ */
+void i915_vma_resource_free(struct i915_vma_resource *vma_res)
+{
+	if (vma_res)
+		kmem_cache_free(slab_vma_resources, vma_res);
+}
+
+static const char *get_driver_name(struct dma_fence *fence)
+{
+	return "vma unbind fence";
+}
+
+static const char *get_timeline_name(struct dma_fence *fence)
+{
+	return "unbound";
+}
+
+static void unbind_fence_free_rcu(struct rcu_head *head)
+{
+	struct i915_vma_resource *vma_res =
+		container_of(head, typeof(*vma_res), unbind_fence.rcu);
+
+	i915_vma_resource_free(vma_res);
+}
+
+static void unbind_fence_release(struct dma_fence *fence)
+{
+	struct i915_vma_resource *vma_res =
+		container_of(fence, typeof(*vma_res), unbind_fence);
+
+	i915_sw_fence_fini(&vma_res->chain);
+
+	call_rcu(&fence->rcu, unbind_fence_free_rcu);
+}
+
+static struct dma_fence_ops unbind_fence_ops = {
+	.get_driver_name = get_driver_name,
+	.get_timeline_name = get_timeline_name,
+	.release = unbind_fence_release,
+};
+
+static void __i915_vma_resource_unhold(struct i915_vma_resource *vma_res)
+{
+	struct i915_address_space *vm;
+
+	if (!refcount_dec_and_test(&vma_res->hold_count))
+		return;
+
+	dma_fence_signal(&vma_res->unbind_fence);
+
+	vm = vma_res->vm;
+	if (vma_res->wakeref)
+		intel_runtime_pm_put(&vm->i915->runtime_pm, vma_res->wakeref);
+
+	vma_res->vm = NULL;
+	if (!RB_EMPTY_NODE(&vma_res->rb)) {
+		mutex_lock(&vm->mutex);
+		vma_res_itree_remove(vma_res, &vm->pending_unbind);
+		mutex_unlock(&vm->mutex);
+	}
+
+	if (vma_res->bi.pages_rsgt)
+		i915_refct_sgt_put(vma_res->bi.pages_rsgt);
+}
+
+/**
+ * i915_vma_resource_unhold - Unhold the signaling of the vma resource unbind
+ * fence.
+ * @vma_res: The vma resource.
+ * @lockdep_cookie: The lockdep cookie returned from i915_vma_resource_hold.
+ *
+ * The function may leave a dma_fence critical section.
+ */
+void i915_vma_resource_unhold(struct i915_vma_resource *vma_res,
+			      bool lockdep_cookie)
+{
+	dma_fence_end_signalling(lockdep_cookie);
+
+	if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
+		unsigned long irq_flags;
+
+		/* Inefficient open-coded might_lock_irqsave() */
+		spin_lock_irqsave(&vma_res->lock, irq_flags);
+		spin_unlock_irqrestore(&vma_res->lock, irq_flags);
+	}
+
+	__i915_vma_resource_unhold(vma_res);
+}
+
+/**
+ * i915_vma_resource_hold - Hold the signaling of the vma resource unbind fence.
+ * @vma_res: The vma resource.
+ * @lockdep_cookie: Pointer to a bool serving as a lockdep cooke that should
+ * be given as an argument to the pairing i915_vma_resource_unhold.
+ *
+ * If returning true, the function enters a dma_fence signalling critical
+ * section if not in one already.
+ *
+ * Return: true if holding successful, false if not.
+ */
+bool i915_vma_resource_hold(struct i915_vma_resource *vma_res,
+			    bool *lockdep_cookie)
+{
+	bool held = refcount_inc_not_zero(&vma_res->hold_count);
+
+	if (held)
+		*lockdep_cookie = dma_fence_begin_signalling();
+
+	return held;
+}
+
+static void i915_vma_resource_unbind_work(struct work_struct *work)
+{
+	struct i915_vma_resource *vma_res =
+		container_of(work, typeof(*vma_res), work);
+	struct i915_address_space *vm = vma_res->vm;
+	bool lockdep_cookie;
+
+	lockdep_cookie = dma_fence_begin_signalling();
+	if (likely(atomic_read(&vm->open)))
+		vma_res->ops->unbind_vma(vm, vma_res);
+
+	dma_fence_end_signalling(lockdep_cookie);
+	__i915_vma_resource_unhold(vma_res);
+	i915_vma_resource_put(vma_res);
+}
+
+static int
+i915_vma_resource_fence_notify(struct i915_sw_fence *fence,
+			       enum i915_sw_fence_notify state)
+{
+	struct i915_vma_resource *vma_res =
+		container_of(fence, typeof(*vma_res), chain);
+	struct dma_fence *unbind_fence =
+		&vma_res->unbind_fence;
+
+	switch (state) {
+	case FENCE_COMPLETE:
+		dma_fence_get(unbind_fence);
+		if (vma_res->immediate_unbind) {
+			i915_vma_resource_unbind_work(&vma_res->work);
+		} else {
+			INIT_WORK(&vma_res->work, i915_vma_resource_unbind_work);
+			queue_work(system_unbound_wq, &vma_res->work);
+		}
+		break;
+	case FENCE_FREE:
+		i915_vma_resource_put(vma_res);
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+/**
+ * i915_vma_resource_unbind - Unbind a vma resource
+ * @vma_res: The vma resource to unbind.
+ *
+ * At this point this function does little more than publish a fence that
+ * signals immediately unless signaling is held back.
+ *
+ * Return: A refcounted pointer to a dma-fence that signals when unbinding is
+ * complete.
+ */
+struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res)
+{
+	struct i915_address_space *vm = vma_res->vm;
+
+	/* Reference for the sw fence */
+	i915_vma_resource_get(vma_res);
+
+	/* Caller must already have a wakeref in this case. */
+	if (vma_res->needs_wakeref)
+		vma_res->wakeref = intel_runtime_pm_get_if_in_use(&vm->i915->runtime_pm);
+
+	if (atomic_read(&vma_res->chain.pending) <= 1) {
+		RB_CLEAR_NODE(&vma_res->rb);
+		vma_res->immediate_unbind = 1;
+	} else {
+		vma_res_itree_insert(vma_res, &vma_res->vm->pending_unbind);
+	}
+
+	i915_sw_fence_commit(&vma_res->chain);
+
+	return &vma_res->unbind_fence;
+}
+
+/**
+ * __i915_vma_resource_init - Initialize a vma resource.
+ * @vma_res: The vma resource to initialize
+ *
+ * Initializes the private members of a vma resource.
+ */
+void __i915_vma_resource_init(struct i915_vma_resource *vma_res)
+{
+	spin_lock_init(&vma_res->lock);
+	dma_fence_init(&vma_res->unbind_fence, &unbind_fence_ops,
+		       &vma_res->lock, 0, 0);
+	refcount_set(&vma_res->hold_count, 1);
+	i915_sw_fence_init(&vma_res->chain, i915_vma_resource_fence_notify);
+}
+
+static void
+i915_vma_resource_color_adjust_range(struct i915_address_space *vm,
+				     u64 *start,
+				     u64 *end)
+{
+	if (i915_vm_has_cache_coloring(vm)) {
+		if (*start)
+			*start -= I915_GTT_PAGE_SIZE;
+		*end += I915_GTT_PAGE_SIZE;
+	}
+}
+
+/**
+ * i915_vma_resource_bind_dep_sync - Wait for / sync all unbinds touching a
+ * certain vm range.
+ * @vm: The vm to look at.
+ * @offset: The range start.
+ * @size: The range size.
+ * @intr: Whether to wait interrubtible.
+ *
+ * The function needs to be called with the vm lock held.
+ *
+ * Return: Zero on success, -ERESTARTSYS if interrupted and @intr==true
+ */
+int i915_vma_resource_bind_dep_sync(struct i915_address_space *vm,
+				    u64 offset,
+				    u64 size,
+				    bool intr)
+{
+	struct i915_vma_resource *node;
+	u64 last = offset + size - 1;
+
+	lockdep_assert_held(&vm->mutex);
+	might_sleep();
+
+	i915_vma_resource_color_adjust_range(vm, &offset, &last);
+	node = vma_res_itree_iter_first(&vm->pending_unbind, offset, last);
+	while (node) {
+		int ret = dma_fence_wait(&node->unbind_fence, intr);
+
+		if (ret)
+			return ret;
+
+		node = vma_res_itree_iter_next(node, offset, last);
+	}
+
+	return 0;
+}
+
+/**
+ * i915_vma_resource_bind_dep_sync_all - Wait for / sync all unbinds of a vm,
+ * releasing the vm lock while waiting.
+ * @vm: The vm to look at.
+ *
+ * The function may not be called with the vm lock held.
+ * Typically this is called at vm destruction to finish any pending
+ * unbind operations. The vm mutex is released while waiting to avoid
+ * stalling kernel workqueues trying to grab the mutex.
+ */
+void i915_vma_resource_bind_dep_sync_all(struct i915_address_space *vm)
+{
+	struct i915_vma_resource *node;
+	struct dma_fence *fence;
+
+	do {
+		fence = NULL;
+		mutex_lock(&vm->mutex);
+		node = vma_res_itree_iter_first(&vm->pending_unbind, 0,
+						U64_MAX);
+		if (node)
+			fence = dma_fence_get_rcu(&node->unbind_fence);
+		mutex_unlock(&vm->mutex);
+
+		if (fence) {
+			/*
+			 * The wait makes sure the node eventually removes
+			 * itself from the tree.
+			 */
+			dma_fence_wait(fence, false);
+			dma_fence_put(fence);
+		}
+	} while (node);
+}
+
+/**
+ * i915_vma_resource_bind_dep_await - Have a struct i915_sw_fence await all
+ * pending unbinds in a certain range of a vm.
+ * @vm: The vm to look at.
+ * @sw_fence: The struct i915_sw_fence that will be awaiting the unbinds.
+ * @offset: The range start.
+ * @size: The range size.
+ * @intr: Whether to wait interrubtible.
+ * @gfp: Allocation mode for memory allocations.
+ *
+ * The function makes @sw_fence await all pending unbinds in a certain
+ * vm range before calling the complete notifier. To be able to await
+ * each individual unbind, the function needs to allocate memory using
+ * the @gpf allocation mode. If that fails, the function will instead
+ * wait for the unbind fence to signal, using @intr to judge whether to
+ * wait interruptible or not. Note that @gfp should ideally be selected so
+ * as to avoid any expensive memory allocation stalls and rather fail and
+ * synchronize itself. For now the vm mutex is required when calling this
+ * function with means that @gfp can't call into direct reclaim. In reality
+ * this means that during heavy memory pressure, we will sync in this
+ * function.
+ *
+ * Return: Zero on success, -ERESTARTSYS if interrupted and @intr==true
+ */
+int i915_vma_resource_bind_dep_await(struct i915_address_space *vm,
+				     struct i915_sw_fence *sw_fence,
+				     u64 offset,
+				     u64 size,
+				     bool intr,
+				     gfp_t gfp)
+{
+	struct i915_vma_resource *node;
+	u64 last = offset + size - 1;
+
+	lockdep_assert_held(&vm->mutex);
+	might_alloc(gfp);
+	might_sleep();
+
+	i915_vma_resource_color_adjust_range(vm, &offset, &last);
+	node = vma_res_itree_iter_first(&vm->pending_unbind, offset, last);
+	while (node) {
+		int ret;
+
+		ret = i915_sw_fence_await_dma_fence(sw_fence,
+						    &node->unbind_fence,
+						    0, gfp);
+		if (ret < 0) {
+			ret = dma_fence_wait(&node->unbind_fence, intr);
+			if (ret)
+				return ret;
+		}
+
+		node = vma_res_itree_iter_next(node, offset, last);
+	}
+
+	return 0;
+}
+
+void i915_vma_resource_module_exit(void)
+{
+	kmem_cache_destroy(slab_vma_resources);
+}
+
+int __init i915_vma_resource_module_init(void)
+{
+	slab_vma_resources = KMEM_CACHE(i915_vma_resource, SLAB_HWCACHE_ALIGN);
+	if (!slab_vma_resources)
+		return -ENOMEM;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/i915_vma_resource.h b/drivers/gpu/drm/i915/i915_vma_resource.h
new file mode 100644
index 0000000000000000000000000000000000000000..25913913baa64f121bda8760bedbcbaca436dfac
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_vma_resource.h
@@ -0,0 +1,234 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef __I915_VMA_RESOURCE_H__
+#define __I915_VMA_RESOURCE_H__
+
+#include <linux/dma-fence.h>
+#include <linux/refcount.h>
+
+#include "i915_gem.h"
+#include "i915_scatterlist.h"
+#include "i915_sw_fence.h"
+#include "intel_runtime_pm.h"
+
+struct intel_memory_region;
+
+struct i915_page_sizes {
+	/**
+	 * The sg mask of the pages sg_table. i.e the mask of
+	 * the lengths for each sg entry.
+	 */
+	unsigned int phys;
+
+	/**
+	 * The gtt page sizes we are allowed to use given the
+	 * sg mask and the supported page sizes. This will
+	 * express the smallest unit we can use for the whole
+	 * object, as well as the larger sizes we may be able
+	 * to use opportunistically.
+	 */
+	unsigned int sg;
+};
+
+/**
+ * struct i915_vma_resource - Snapshotted unbind information.
+ * @unbind_fence: Fence to mark unbinding complete. Note that this fence
+ * is not considered published until unbind is scheduled, and as such it
+ * is illegal to access this fence before scheduled unbind other than
+ * for refcounting.
+ * @lock: The @unbind_fence lock.
+ * @hold_count: Number of holders blocking the fence from finishing.
+ * The vma itself is keeping a hold, which is released when unbind
+ * is scheduled.
+ * @work: Work struct for deferred unbind work.
+ * @chain: Pointer to struct i915_sw_fence used to await dependencies.
+ * @rb: Rb node for the vm's pending unbind interval tree.
+ * @__subtree_last: Interval tree private member.
+ * @vm: non-refcounted pointer to the vm. This is for internal use only and
+ * this member is cleared after vm_resource unbind.
+ * @mr: The memory region of the object pointed to by the vma.
+ * @ops: Pointer to the backend i915_vma_ops.
+ * @private: Bind backend private info.
+ * @start: Offset into the address space of bind range start.
+ * @node_size: Size of the allocated range manager node.
+ * @vma_size: Bind size.
+ * @page_sizes_gtt: Resulting page sizes from the bind operation.
+ * @bound_flags: Flags indicating binding status.
+ * @allocated: Backend private data. TODO: Should move into @private.
+ * @immediate_unbind: Unbind can be done immediately and doesn't need to be
+ * deferred to a work item awaiting unsignaled fences. This is a hack.
+ * (dma_fence_work uses a fence flag for this, but this seems slightly
+ * cleaner).
+ *
+ * The lifetime of a struct i915_vma_resource is from a binding request to
+ * the actual possible asynchronous unbind has completed.
+ */
+struct i915_vma_resource {
+	struct dma_fence unbind_fence;
+	/* See above for description of the lock. */
+	spinlock_t lock;
+	refcount_t hold_count;
+	struct work_struct work;
+	struct i915_sw_fence chain;
+	struct rb_node rb;
+	u64 __subtree_last;
+	struct i915_address_space *vm;
+	intel_wakeref_t wakeref;
+
+	/**
+	 * struct i915_vma_bindinfo - Information needed for async bind
+	 * only but that can be dropped after the bind has taken place.
+	 * Consider making this a separate argument to the bind_vma
+	 * op, coalescing with other arguments like vm, stash, cache_level
+	 * and flags
+	 * @pages: The pages sg-table.
+	 * @page_sizes: Page sizes of the pages.
+	 * @pages_rsgt: Refcounted sg-table when delayed object destruction
+	 * is supported. May be NULL.
+	 * @readonly: Whether the vma should be bound read-only.
+	 * @lmem: Whether the vma points to lmem.
+	 */
+	struct i915_vma_bindinfo {
+		struct sg_table *pages;
+		struct i915_page_sizes page_sizes;
+		struct i915_refct_sgt *pages_rsgt;
+		bool readonly:1;
+		bool lmem:1;
+	} bi;
+
+#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
+	struct intel_memory_region *mr;
+#endif
+	const struct i915_vma_ops *ops;
+	void *private;
+	u64 start;
+	u64 node_size;
+	u64 vma_size;
+	u32 page_sizes_gtt;
+
+	u32 bound_flags;
+	bool allocated:1;
+	bool immediate_unbind:1;
+	bool needs_wakeref:1;
+};
+
+bool i915_vma_resource_hold(struct i915_vma_resource *vma_res,
+			    bool *lockdep_cookie);
+
+void i915_vma_resource_unhold(struct i915_vma_resource *vma_res,
+			      bool lockdep_cookie);
+
+struct i915_vma_resource *i915_vma_resource_alloc(void);
+
+void i915_vma_resource_free(struct i915_vma_resource *vma_res);
+
+struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res);
+
+void __i915_vma_resource_init(struct i915_vma_resource *vma_res);
+
+/**
+ * i915_vma_resource_get - Take a reference on a vma resource
+ * @vma_res: The vma resource on which to take a reference.
+ *
+ * Return: The @vma_res pointer
+ */
+static inline struct i915_vma_resource
+*i915_vma_resource_get(struct i915_vma_resource *vma_res)
+{
+	dma_fence_get(&vma_res->unbind_fence);
+	return vma_res;
+}
+
+/**
+ * i915_vma_resource_put - Release a reference to a struct i915_vma_resource
+ * @vma_res: The resource
+ */
+static inline void i915_vma_resource_put(struct i915_vma_resource *vma_res)
+{
+	dma_fence_put(&vma_res->unbind_fence);
+}
+
+/**
+ * i915_vma_resource_init - Initialize a vma resource.
+ * @vma_res: The vma resource to initialize
+ * @vm: Pointer to the vm.
+ * @pages: The pages sg-table.
+ * @page_sizes: Page sizes of the pages.
+ * @pages_rsgt: Pointer to a struct i915_refct_sgt of an object with
+ * delayed destruction.
+ * @readonly: Whether the vma should be bound read-only.
+ * @lmem: Whether the vma points to lmem.
+ * @mr: The memory region of the object the vma points to.
+ * @ops: The backend ops.
+ * @private: Bind backend private info.
+ * @start: Offset into the address space of bind range start.
+ * @node_size: Size of the allocated range manager node.
+ * @size: Bind size.
+ *
+ * Initializes a vma resource allocated using i915_vma_resource_alloc().
+ * The reason for having separate allocate and initialize function is that
+ * initialization may need to be performed from under a lock where
+ * allocation is not allowed.
+ */
+static inline void i915_vma_resource_init(struct i915_vma_resource *vma_res,
+					  struct i915_address_space *vm,
+					  struct sg_table *pages,
+					  const struct i915_page_sizes *page_sizes,
+					  struct i915_refct_sgt *pages_rsgt,
+					  bool readonly,
+					  bool lmem,
+					  struct intel_memory_region *mr,
+					  const struct i915_vma_ops *ops,
+					  void *private,
+					  u64 start,
+					  u64 node_size,
+					  u64 size)
+{
+	__i915_vma_resource_init(vma_res);
+	vma_res->vm = vm;
+	vma_res->bi.pages = pages;
+	vma_res->bi.page_sizes = *page_sizes;
+	if (pages_rsgt)
+		vma_res->bi.pages_rsgt = i915_refct_sgt_get(pages_rsgt);
+	vma_res->bi.readonly = readonly;
+	vma_res->bi.lmem = lmem;
+#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
+	vma_res->mr = mr;
+#endif
+	vma_res->ops = ops;
+	vma_res->private = private;
+	vma_res->start = start;
+	vma_res->node_size = node_size;
+	vma_res->vma_size = size;
+}
+
+static inline void i915_vma_resource_fini(struct i915_vma_resource *vma_res)
+{
+	GEM_BUG_ON(refcount_read(&vma_res->hold_count) != 1);
+	if (vma_res->bi.pages_rsgt)
+		i915_refct_sgt_put(vma_res->bi.pages_rsgt);
+	i915_sw_fence_fini(&vma_res->chain);
+}
+
+int i915_vma_resource_bind_dep_sync(struct i915_address_space *vm,
+				    u64 first,
+				    u64 last,
+				    bool intr);
+
+int i915_vma_resource_bind_dep_await(struct i915_address_space *vm,
+				     struct i915_sw_fence *sw_fence,
+				     u64 first,
+				     u64 last,
+				     bool intr,
+				     gfp_t gfp);
+
+void i915_vma_resource_bind_dep_sync_all(struct i915_address_space *vm);
+
+void i915_vma_resource_module_exit(void);
+
+int i915_vma_resource_module_init(void);
+
+#endif
diff --git a/drivers/gpu/drm/i915/i915_vma_snapshot.c b/drivers/gpu/drm/i915/i915_vma_snapshot.c
deleted file mode 100644
index 2949ceea9884b2d4ed6e17ee9904df8355d33957..0000000000000000000000000000000000000000
--- a/drivers/gpu/drm/i915/i915_vma_snapshot.c
+++ /dev/null
@@ -1,134 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright © 2021 Intel Corporation
- */
-
-#include "i915_vma_snapshot.h"
-#include "i915_vma_types.h"
-#include "i915_vma.h"
-
-/**
- * i915_vma_snapshot_init - Initialize a struct i915_vma_snapshot from
- * a struct i915_vma.
- * @vsnap: The i915_vma_snapshot to init.
- * @vma: A struct i915_vma used to initialize @vsnap.
- * @name: Name associated with the snapshot. The character pointer needs to
- * stay alive over the lifitime of the shapsot
- */
-void i915_vma_snapshot_init(struct i915_vma_snapshot *vsnap,
-			    struct i915_vma *vma,
-			    const char *name)
-{
-	if (!i915_vma_is_pinned(vma))
-		assert_object_held(vma->obj);
-
-	vsnap->name = name;
-	vsnap->size = vma->size;
-	vsnap->obj_size = vma->obj->base.size;
-	vsnap->gtt_offset = vma->node.start;
-	vsnap->gtt_size = vma->node.size;
-	vsnap->page_sizes = vma->page_sizes.gtt;
-	vsnap->pages = vma->pages;
-	vsnap->pages_rsgt = NULL;
-	vsnap->mr = NULL;
-	if (vma->obj->mm.rsgt)
-		vsnap->pages_rsgt = i915_refct_sgt_get(vma->obj->mm.rsgt);
-	vsnap->mr = vma->obj->mm.region;
-	kref_init(&vsnap->kref);
-	vsnap->vma_resource = &vma->active;
-	vsnap->onstack = false;
-	vsnap->present = true;
-}
-
-/**
- * i915_vma_snapshot_init_onstack - Initialize a struct i915_vma_snapshot from
- * a struct i915_vma, but avoid kfreeing it on last put.
- * @vsnap: The i915_vma_snapshot to init.
- * @vma: A struct i915_vma used to initialize @vsnap.
- * @name: Name associated with the snapshot. The character pointer needs to
- * stay alive over the lifitime of the shapsot
- */
-void i915_vma_snapshot_init_onstack(struct i915_vma_snapshot *vsnap,
-				    struct i915_vma *vma,
-				    const char *name)
-{
-	i915_vma_snapshot_init(vsnap, vma, name);
-	vsnap->onstack = true;
-}
-
-static void vma_snapshot_release(struct kref *ref)
-{
-	struct i915_vma_snapshot *vsnap =
-		container_of(ref, typeof(*vsnap), kref);
-
-	vsnap->present = false;
-	if (vsnap->pages_rsgt)
-		i915_refct_sgt_put(vsnap->pages_rsgt);
-	if (!vsnap->onstack)
-		kfree(vsnap);
-}
-
-/**
- * i915_vma_snapshot_put - Put an i915_vma_snapshot pointer reference
- * @vsnap: The pointer reference
- */
-void i915_vma_snapshot_put(struct i915_vma_snapshot *vsnap)
-{
-	kref_put(&vsnap->kref, vma_snapshot_release);
-}
-
-/**
- * i915_vma_snapshot_put_onstack - Put an onstcak i915_vma_snapshot pointer
- * reference and varify that the structure is released
- * @vsnap: The pointer reference
- *
- * This function is intended to be paired with a i915_vma_init_onstack()
- * and should be called before exiting the scope that declared or
- * freeing the structure that embedded @vsnap to verify that all references
- * have been released.
- */
-void i915_vma_snapshot_put_onstack(struct i915_vma_snapshot *vsnap)
-{
-	if (!kref_put(&vsnap->kref, vma_snapshot_release))
-		GEM_BUG_ON(1);
-}
-
-/**
- * i915_vma_snapshot_resource_pin - Temporarily block the memory the
- * vma snapshot is pointing to from being released.
- * @vsnap: The vma snapshot.
- * @lockdep_cookie: Pointer to bool needed for lockdep support. This needs
- * to be passed to the paired i915_vma_snapshot_resource_unpin.
- *
- * This function will temporarily try to hold up a fence or similar structure
- * and will therefore enter a fence signaling critical section.
- *
- * Return: true if we succeeded in blocking the memory from being released,
- * false otherwise.
- */
-bool i915_vma_snapshot_resource_pin(struct i915_vma_snapshot *vsnap,
-				    bool *lockdep_cookie)
-{
-	bool pinned = i915_active_acquire_if_busy(vsnap->vma_resource);
-
-	if (pinned)
-		*lockdep_cookie = dma_fence_begin_signalling();
-
-	return pinned;
-}
-
-/**
- * i915_vma_snapshot_resource_unpin - Unblock vma snapshot memory from
- * being released.
- * @vsnap: The vma snapshot.
- * @lockdep_cookie: Cookie returned from matching i915_vma_resource_pin().
- *
- * Might leave a fence signalling critical section and signal a fence.
- */
-void i915_vma_snapshot_resource_unpin(struct i915_vma_snapshot *vsnap,
-				      bool lockdep_cookie)
-{
-	dma_fence_end_signalling(lockdep_cookie);
-
-	return i915_active_release(vsnap->vma_resource);
-}
diff --git a/drivers/gpu/drm/i915/i915_vma_snapshot.h b/drivers/gpu/drm/i915/i915_vma_snapshot.h
deleted file mode 100644
index 940581df4622b023441427e39b13ddd4d4784a23..0000000000000000000000000000000000000000
--- a/drivers/gpu/drm/i915/i915_vma_snapshot.h
+++ /dev/null
@@ -1,112 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2021 Intel Corporation
- */
-#ifndef _I915_VMA_SNAPSHOT_H_
-#define _I915_VMA_SNAPSHOT_H_
-
-#include <linux/kref.h>
-#include <linux/slab.h>
-#include <linux/types.h>
-
-struct i915_active;
-struct i915_refct_sgt;
-struct i915_vma;
-struct intel_memory_region;
-struct sg_table;
-
-/**
- * DOC: Simple utilities for snapshotting GPU vma metadata, later used for
- * error capture. Vi use a separate header for this to avoid issues due to
- * recursive header includes.
- */
-
-/**
- * struct i915_vma_snapshot - Snapshot of vma metadata.
- * @size: The vma size in bytes.
- * @obj_size: The size of the underlying object in bytes.
- * @gtt_offset: The gtt offset the vma is bound to.
- * @gtt_size: The size in bytes allocated for the vma in the GTT.
- * @pages: The struct sg_table pointing to the pages bound.
- * @pages_rsgt: The refcounted sg_table holding the reference for @pages if any.
- * @mr: The memory region pointed for the pages bound.
- * @kref: Reference for this structure.
- * @vma_resource: FIXME: A means to keep the unbind fence from signaling.
- * Temporarily while we have only sync unbinds, and still use the vma
- * active, we use that. With async unbinding we need a signaling refcount
- * for the unbind fence.
- * @page_sizes: The vma GTT page sizes information.
- * @onstack: Whether the structure shouldn't be freed on final put.
- * @present: Whether the structure is present and initialized.
- */
-struct i915_vma_snapshot {
-	const char *name;
-	size_t size;
-	size_t obj_size;
-	size_t gtt_offset;
-	size_t gtt_size;
-	struct sg_table *pages;
-	struct i915_refct_sgt *pages_rsgt;
-	struct intel_memory_region *mr;
-	struct kref kref;
-	struct i915_active *vma_resource;
-	u32 page_sizes;
-	bool onstack:1;
-	bool present:1;
-};
-
-void i915_vma_snapshot_init(struct i915_vma_snapshot *vsnap,
-			    struct i915_vma *vma,
-			    const char *name);
-
-void i915_vma_snapshot_init_onstack(struct i915_vma_snapshot *vsnap,
-				    struct i915_vma *vma,
-				    const char *name);
-
-void i915_vma_snapshot_put(struct i915_vma_snapshot *vsnap);
-
-void i915_vma_snapshot_put_onstack(struct i915_vma_snapshot *vsnap);
-
-bool i915_vma_snapshot_resource_pin(struct i915_vma_snapshot *vsnap,
-				    bool *lockdep_cookie);
-
-void i915_vma_snapshot_resource_unpin(struct i915_vma_snapshot *vsnap,
-				      bool lockdep_cookie);
-
-/**
- * i915_vma_snapshot_alloc - Allocate a struct i915_vma_snapshot
- * @gfp: Allocation mode.
- *
- * Return: A pointer to a struct i915_vma_snapshot if successful.
- * NULL otherwise.
- */
-static inline struct i915_vma_snapshot *i915_vma_snapshot_alloc(gfp_t gfp)
-{
-	return kmalloc(sizeof(struct i915_vma_snapshot), gfp);
-}
-
-/**
- * i915_vma_snapshot_get - Take a reference on a struct i915_vma_snapshot
- *
- * Return: A pointer to a struct i915_vma_snapshot.
- */
-static inline struct i915_vma_snapshot *
-i915_vma_snapshot_get(struct i915_vma_snapshot *vsnap)
-{
-	kref_get(&vsnap->kref);
-	return vsnap;
-}
-
-/**
- * i915_vma_snapshot_present - Whether a struct i915_vma_snapshot is
- * present and initialized.
- *
- * Return: true if present and initialized; false otherwise.
- */
-static inline bool
-i915_vma_snapshot_present(const struct i915_vma_snapshot *vsnap)
-{
-	return vsnap && vsnap->present;
-}
-
-#endif
diff --git a/drivers/gpu/drm/i915/i915_vma_types.h b/drivers/gpu/drm/i915/i915_vma_types.h
index ca575e129ced61f1ee3b41ff0787b98a1541e389..88370dadca820ecfa3d139936183b233fae7e898 100644
--- a/drivers/gpu/drm/i915/i915_vma_types.h
+++ b/drivers/gpu/drm/i915/i915_vma_types.h
@@ -95,6 +95,8 @@ enum i915_cache_level;
  *
  */
 
+struct i915_vma_resource;
+
 struct intel_remapped_plane_info {
 	/* in gtt pages */
 	u32 offset:31;
@@ -247,22 +249,20 @@ struct i915_vma {
 
 #define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)
 
-#define I915_VMA_ALLOC_BIT	12
-
-#define I915_VMA_ERROR_BIT	13
+#define I915_VMA_ERROR_BIT	12
 #define I915_VMA_ERROR		((int)BIT(I915_VMA_ERROR_BIT))
 
-#define I915_VMA_GGTT_BIT	14
-#define I915_VMA_CAN_FENCE_BIT	15
-#define I915_VMA_USERFAULT_BIT	16
-#define I915_VMA_GGTT_WRITE_BIT	17
+#define I915_VMA_GGTT_BIT	13
+#define I915_VMA_CAN_FENCE_BIT	14
+#define I915_VMA_USERFAULT_BIT	15
+#define I915_VMA_GGTT_WRITE_BIT	16
 
 #define I915_VMA_GGTT		((int)BIT(I915_VMA_GGTT_BIT))
 #define I915_VMA_CAN_FENCE	((int)BIT(I915_VMA_CAN_FENCE_BIT))
 #define I915_VMA_USERFAULT	((int)BIT(I915_VMA_USERFAULT_BIT))
 #define I915_VMA_GGTT_WRITE	((int)BIT(I915_VMA_GGTT_WRITE_BIT))
 
-#define I915_VMA_SCANOUT_BIT	18
+#define I915_VMA_SCANOUT_BIT	17
 #define I915_VMA_SCANOUT	((int)BIT(I915_VMA_SCANOUT_BIT))
 
 	struct i915_active active;
@@ -291,6 +291,9 @@ struct i915_vma {
 	struct list_head evict_link;
 
 	struct list_head closed_link;
+
+	/** The async vma resource. Protected by the vm_mutex */
+	struct i915_vma_resource *resource;
 };
 
 #endif
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 3699b1c539eab8ccd5806147f379ec8982170e3a..27dcfe6f24290f9faefdf2daa2092abf041698db 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -96,7 +96,7 @@ enum intel_platform {
  * it is fine for the same bit to be used on multiple parent platforms.
  */
 
-#define INTEL_SUBPLATFORM_BITS (2)
+#define INTEL_SUBPLATFORM_BITS (3)
 #define INTEL_SUBPLATFORM_MASK (BIT(INTEL_SUBPLATFORM_BITS) - 1)
 
 /* HSW/BDW/SKL/KBL/CFL */
@@ -109,6 +109,7 @@ enum intel_platform {
 /* DG2 */
 #define INTEL_SUBPLATFORM_G10	0
 #define INTEL_SUBPLATFORM_G11	1
+#define INTEL_SUBPLATFORM_G12	2
 
 /* ADL-S */
 #define INTEL_SUBPLATFORM_RPL_S	0
@@ -134,6 +135,7 @@ enum intel_ppgtt_type {
 	func(has_reset_engine); \
 	func(has_global_mocs); \
 	func(has_gt_uc); \
+	func(has_guc_deprivilege); \
 	func(has_l3_dpf); \
 	func(has_llc); \
 	func(has_logical_ring_contexts); \
diff --git a/drivers/gpu/drm/i915/intel_dram.c b/drivers/gpu/drm/i915/intel_dram.c
index 723bd0411a0e7be007b0b781482c452ec155757a..174c95c3e10f8e747adfb7ccb47d9549ef90e042 100644
--- a/drivers/gpu/drm/i915/intel_dram.c
+++ b/drivers/gpu/drm/i915/intel_dram.c
@@ -6,6 +6,7 @@
 #include "i915_drv.h"
 #include "i915_reg.h"
 #include "intel_dram.h"
+#include "intel_mchbar_regs.h"
 #include "intel_pcode.h"
 
 struct dram_dimm_info {
diff --git a/drivers/gpu/drm/i915/intel_mchbar_regs.h b/drivers/gpu/drm/i915/intel_mchbar_regs.h
new file mode 100644
index 0000000000000000000000000000000000000000..2aad2f0cc8db6bed719f114adc928169630b5ffc
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_mchbar_regs.h
@@ -0,0 +1,228 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef __INTEL_MCHBAR_REGS__
+#define __INTEL_MCHBAR_REGS__
+
+#include "i915_reg_defs.h"
+
+/*
+ * MCHBAR mirror.
+ *
+ * This mirrors the MCHBAR MMIO space whose location is determined by
+ * device 0 function 0's pci config register 0x44 or 0x48 and matches it in
+ * every way.  It is not accessible from the CP register read instructions.
+ *
+ * Starting from Haswell, you can't write registers using the MCHBAR mirror,
+ * just read.
+ */
+
+#define MCHBAR_MIRROR_BASE			0x10000
+#define MCHBAR_MIRROR_BASE_SNB			0x140000
+
+#define CTG_STOLEN_RESERVED			_MMIO(MCHBAR_MIRROR_BASE + 0x34)
+#define ELK_STOLEN_RESERVED			_MMIO(MCHBAR_MIRROR_BASE + 0x48)
+#define   G4X_STOLEN_RESERVED_ADDR1_MASK	(0xFFFF << 16)
+#define   G4X_STOLEN_RESERVED_ADDR2_MASK	(0xFFF << 4)
+#define   G4X_STOLEN_RESERVED_ENABLE		(1 << 0)
+
+/* Pineview MCH register contains DDR3 setting */
+#define CSHRDDR3CTL				_MMIO(MCHBAR_MIRROR_BASE + 0x1a8)
+#define   CSHRDDR3CTL_DDR3			(1 << 2)
+
+/* 915-945 and GM965 MCH register controlling DRAM channel access */
+#define DCC					_MMIO(MCHBAR_MIRROR_BASE + 0x200)
+#define   DCC_ADDRESSING_MODE_SINGLE_CHANNEL	(0 << 0)
+#define   DCC_ADDRESSING_MODE_DUAL_CHANNEL_ASYMMETRIC	(1 << 0)
+#define   DCC_ADDRESSING_MODE_DUAL_CHANNEL_INTERLEAVED	(2 << 0)
+#define   DCC_ADDRESSING_MODE_MASK		(3 << 0)
+#define   DCC_CHANNEL_XOR_DISABLE		(1 << 10)
+#define   DCC_CHANNEL_XOR_BIT_17		(1 << 9)
+#define DCC2					_MMIO(MCHBAR_MIRROR_BASE + 0x204)
+#define   DCC2_MODIFIED_ENHANCED_DISABLE	(1 << 20)
+
+/* 965 MCH register controlling DRAM channel configuration */
+#define C0DRB3_BW				_MMIO(MCHBAR_MIRROR_BASE + 0x206)
+#define C1DRB3_BW				_MMIO(MCHBAR_MIRROR_BASE + 0x606)
+
+/* Clocking configuration register */
+#define CLKCFG					_MMIO(MCHBAR_MIRROR_BASE + 0xc00)
+#define CLKCFG_FSB_400				(0 << 0)	/* hrawclk 100 */
+#define CLKCFG_FSB_400_ALT			(5 << 0)	/* hrawclk 100 */
+#define CLKCFG_FSB_533				(1 << 0)	/* hrawclk 133 */
+#define CLKCFG_FSB_667				(3 << 0)	/* hrawclk 166 */
+#define CLKCFG_FSB_800				(2 << 0)	/* hrawclk 200 */
+#define CLKCFG_FSB_1067				(6 << 0)	/* hrawclk 266 */
+#define CLKCFG_FSB_1067_ALT			(0 << 0)	/* hrawclk 266 */
+#define CLKCFG_FSB_1333				(7 << 0)	/* hrawclk 333 */
+#define CLKCFG_FSB_1333_ALT			(4 << 0)	/* hrawclk 333 */
+#define CLKCFG_FSB_1600_ALT			(6 << 0)	/* hrawclk 400 */
+#define CLKCFG_FSB_MASK				(7 << 0)
+#define CLKCFG_MEM_533				(1 << 4)
+#define CLKCFG_MEM_667				(2 << 4)
+#define CLKCFG_MEM_800				(3 << 4)
+#define CLKCFG_MEM_MASK				(7 << 4)
+
+#define HPLLVCO_MOBILE				_MMIO(MCHBAR_MIRROR_BASE + 0xc0f)
+#define HPLLVCO					_MMIO(MCHBAR_MIRROR_BASE + 0xc38)
+
+#define TSC1					_MMIO(MCHBAR_MIRROR_BASE + 0x1001)
+#define   TSE					(1 << 0)
+#define TR1					_MMIO(MCHBAR_MIRROR_BASE + 0x1006)
+#define TSFS					_MMIO(MCHBAR_MIRROR_BASE + 0x1020)
+#define   TSFS_SLOPE_MASK			0x0000ff00
+#define   TSFS_SLOPE_SHIFT			8
+#define   TSFS_INTR_MASK			0x000000ff
+
+/* Memory latency timer register */
+#define MLTR_ILK				_MMIO(MCHBAR_MIRROR_BASE + 0x1222)
+/* the unit of memory self-refresh latency time is 0.5us */
+#define   MLTR_WM2_MASK				REG_GENMASK(13, 8)
+#define   MLTR_WM1_MASK				REG_GENMASK(5, 0)
+
+#define CSIPLL0					_MMIO(MCHBAR_MIRROR_BASE + 0x2c10)
+#define DDRMPLL1				_MMIO(MCHBAR_MIRROR_BASE + 0x2c20)
+
+#define ILK_GDSR				_MMIO(MCHBAR_MIRROR_BASE + 0x2ca4)
+#define  ILK_GRDOM_FULL				(0 << 1)
+#define  ILK_GRDOM_RENDER			(1 << 1)
+#define  ILK_GRDOM_MEDIA			(3 << 1)
+#define  ILK_GRDOM_MASK				(3 << 1)
+#define  ILK_GRDOM_RESET_ENABLE			(1 << 0)
+
+#define BXT_D_CR_DRP0_DUNIT8			0x1000
+#define BXT_D_CR_DRP0_DUNIT9			0x1200
+#define   BXT_D_CR_DRP0_DUNIT_START		8
+#define   BXT_D_CR_DRP0_DUNIT_END		11
+#define BXT_D_CR_DRP0_DUNIT(x)			_MMIO(MCHBAR_MIRROR_BASE_SNB + \
+						      _PICK_EVEN((x) - 8, BXT_D_CR_DRP0_DUNIT8,\
+								 BXT_D_CR_DRP0_DUNIT9))
+#define   BXT_DRAM_RANK_MASK			0x3
+#define   BXT_DRAM_RANK_SINGLE			0x1
+#define   BXT_DRAM_RANK_DUAL			0x3
+#define   BXT_DRAM_WIDTH_MASK			(0x3 << 4)
+#define   BXT_DRAM_WIDTH_SHIFT			4
+#define   BXT_DRAM_WIDTH_X8			(0x0 << 4)
+#define   BXT_DRAM_WIDTH_X16			(0x1 << 4)
+#define   BXT_DRAM_WIDTH_X32			(0x2 << 4)
+#define   BXT_DRAM_WIDTH_X64			(0x3 << 4)
+#define   BXT_DRAM_SIZE_MASK			(0x7 << 6)
+#define   BXT_DRAM_SIZE_SHIFT			6
+#define   BXT_DRAM_SIZE_4GBIT			(0x0 << 6)
+#define   BXT_DRAM_SIZE_6GBIT			(0x1 << 6)
+#define   BXT_DRAM_SIZE_8GBIT			(0x2 << 6)
+#define   BXT_DRAM_SIZE_12GBIT			(0x3 << 6)
+#define   BXT_DRAM_SIZE_16GBIT			(0x4 << 6)
+#define   BXT_DRAM_TYPE_MASK			(0x7 << 22)
+#define   BXT_DRAM_TYPE_SHIFT			22
+#define   BXT_DRAM_TYPE_DDR3			(0x0 << 22)
+#define   BXT_DRAM_TYPE_LPDDR3			(0x1 << 22)
+#define   BXT_DRAM_TYPE_LPDDR4			(0x2 << 22)
+#define   BXT_DRAM_TYPE_DDR4			(0x4 << 22)
+
+#define MCHBAR_CH0_CR_TC_PRE_0_0_0_MCHBAR	_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x4000)
+#define   DG1_DRAM_T_RDPRE_MASK			REG_GENMASK(16, 11)
+#define   DG1_DRAM_T_RP_MASK			REG_GENMASK(6, 0)
+#define MCHBAR_CH0_CR_TC_PRE_0_0_0_MCHBAR_HIGH	_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x4004)
+#define   DG1_DRAM_T_RCD_MASK			REG_GENMASK(15, 9)
+#define   DG1_DRAM_T_RAS_MASK			REG_GENMASK(8, 1)
+
+#define SKL_MAD_INTER_CHANNEL_0_0_0_MCHBAR_MCMAIN	_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5000)
+#define   SKL_DRAM_DDR_TYPE_MASK		(0x3 << 0)
+#define   SKL_DRAM_DDR_TYPE_DDR4		(0 << 0)
+#define   SKL_DRAM_DDR_TYPE_DDR3		(1 << 0)
+#define   SKL_DRAM_DDR_TYPE_LPDDR3		(2 << 0)
+#define   SKL_DRAM_DDR_TYPE_LPDDR4		(3 << 0)
+
+/* snb MCH registers for reading the DRAM channel configuration */
+#define MAD_DIMM_C0				_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5004)
+#define MAD_DIMM_C1				_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5008)
+#define MAD_DIMM_C2				_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x500C)
+#define   MAD_DIMM_ECC_MASK			(0x3 << 24)
+#define   MAD_DIMM_ECC_OFF			(0x0 << 24)
+#define   MAD_DIMM_ECC_IO_ON_LOGIC_OFF		(0x1 << 24)
+#define   MAD_DIMM_ECC_IO_OFF_LOGIC_ON		(0x2 << 24)
+#define   MAD_DIMM_ECC_ON			(0x3 << 24)
+#define   MAD_DIMM_ENH_INTERLEAVE		(0x1 << 22)
+#define   MAD_DIMM_RANK_INTERLEAVE		(0x1 << 21)
+#define   MAD_DIMM_B_WIDTH_X16			(0x1 << 20) /* X8 chips if unset */
+#define   MAD_DIMM_A_WIDTH_X16			(0x1 << 19) /* X8 chips if unset */
+#define   MAD_DIMM_B_DUAL_RANK			(0x1 << 18)
+#define   MAD_DIMM_A_DUAL_RANK			(0x1 << 17)
+#define   MAD_DIMM_A_SELECT			(0x1 << 16)
+/* DIMM sizes are in multiples of 256mb. */
+#define   MAD_DIMM_B_SIZE_SHIFT			8
+#define   MAD_DIMM_B_SIZE_MASK			(0xff << MAD_DIMM_B_SIZE_SHIFT)
+#define   MAD_DIMM_A_SIZE_SHIFT			0
+#define   MAD_DIMM_A_SIZE_MASK			(0xff << MAD_DIMM_A_SIZE_SHIFT)
+
+#define SKL_MAD_DIMM_CH0_0_0_0_MCHBAR_MCMAIN	_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x500C)
+#define SKL_MAD_DIMM_CH1_0_0_0_MCHBAR_MCMAIN	_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5010)
+#define   SKL_DRAM_S_SHIFT			16
+#define   SKL_DRAM_SIZE_MASK			0x3F
+#define   SKL_DRAM_WIDTH_MASK			(0x3 << 8)
+#define   SKL_DRAM_WIDTH_SHIFT			8
+#define   SKL_DRAM_WIDTH_X8			(0x0 << 8)
+#define   SKL_DRAM_WIDTH_X16			(0x1 << 8)
+#define   SKL_DRAM_WIDTH_X32			(0x2 << 8)
+#define   SKL_DRAM_RANK_MASK			(0x1 << 10)
+#define   SKL_DRAM_RANK_SHIFT			10
+#define   SKL_DRAM_RANK_1			(0x0 << 10)
+#define   SKL_DRAM_RANK_2			(0x1 << 10)
+#define   SKL_DRAM_RANK_MASK			(0x1 << 10)
+#define   ICL_DRAM_SIZE_MASK			0x7F
+#define   ICL_DRAM_WIDTH_MASK			(0x3 << 7)
+#define   ICL_DRAM_WIDTH_SHIFT			7
+#define   ICL_DRAM_WIDTH_X8			(0x0 << 7)
+#define   ICL_DRAM_WIDTH_X16			(0x1 << 7)
+#define   ICL_DRAM_WIDTH_X32			(0x2 << 7)
+#define   ICL_DRAM_RANK_MASK			(0x3 << 9)
+#define   ICL_DRAM_RANK_SHIFT			9
+#define   ICL_DRAM_RANK_1			(0x0 << 9)
+#define   ICL_DRAM_RANK_2			(0x1 << 9)
+#define   ICL_DRAM_RANK_3			(0x2 << 9)
+#define   ICL_DRAM_RANK_4			(0x3 << 9)
+
+#define SA_PERF_STATUS_0_0_0_MCHBAR_PC		_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5918)
+#define  DG1_QCLK_RATIO_MASK			REG_GENMASK(9, 2)
+#define  DG1_QCLK_REFERENCE			REG_BIT(10)
+
+#define GEN6_GT_PERF_STATUS			_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5948)
+#define GEN6_RP_STATE_LIMITS			_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5994)
+#define GEN6_RP_STATE_CAP			_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5998)
+#define   RP0_CAP_MASK				REG_GENMASK(7, 0)
+#define   RP1_CAP_MASK				REG_GENMASK(15, 8)
+#define   RPN_CAP_MASK				REG_GENMASK(23, 16)
+
+/* snb MCH registers for priority tuning */
+#define MCH_SSKPD				_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5d10)
+#define   SSKPD_NEW_WM0_MASK_HSW		REG_GENMASK64(63, 56)
+#define   SSKPD_WM4_MASK_HSW			REG_GENMASK64(40, 32)
+#define   SSKPD_WM3_MASK_HSW			REG_GENMASK64(28, 20)
+#define   SSKPD_WM2_MASK_HSW			REG_GENMASK64(19, 12)
+#define   SSKPD_WM1_MASK_HSW			REG_GENMASK64(11, 4)
+#define   SSKPD_OLD_WM0_MASK_HSW		REG_GENMASK64(3, 0)
+#define   SSKPD_WM3_MASK_SNB			REG_GENMASK(29, 24)
+#define   SSKPD_WM2_MASK_SNB			REG_GENMASK(21, 16)
+#define   SSKPD_WM1_MASK_SNB			REG_GENMASK(13, 8)
+#define   SSKPD_WM0_MASK_SNB			REG_GENMASK(5, 0)
+
+/* Memory controller frequency in MCHBAR for Haswell (possible SNB+) */
+#define DCLK					_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5e04)
+#define SKL_MC_BIOS_DATA_0_0_0_MCHBAR_PCU	_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5e04)
+#define   DG1_GEAR_TYPE				REG_BIT(16)
+
+/*
+ * Please see hsw_read_dcomp() and hsw_write_dcomp() before using this register,
+ * since on HSW we can't write to it using intel_uncore_write.
+ */
+#define D_COMP_HSW				_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5f0c)
+#define  D_COMP_RCOMP_IN_PROGRESS		(1 << 9)
+#define  D_COMP_COMP_FORCE			(1 << 8)
+#define  D_COMP_COMP_DISABLE			(1 << 0)
+
+#define BXT_GT_PERF_STATUS			_MMIO(MCHBAR_MIRROR_BASE_SNB + 0x7070)
+
+#endif /* __INTEL_MCHBAR_REGS */
diff --git a/drivers/gpu/drm/i915/intel_pch.c b/drivers/gpu/drm/i915/intel_pch.c
index da8f82c2342f4c23a092aa172b716acfd9d0a7f1..4f7a61d5502e40511dec0881bff25f92b08e35d0 100644
--- a/drivers/gpu/drm/i915/intel_pch.c
+++ b/drivers/gpu/drm/i915/intel_pch.c
@@ -130,6 +130,7 @@ intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id)
 	case INTEL_PCH_ADP_DEVICE_ID_TYPE:
 	case INTEL_PCH_ADP2_DEVICE_ID_TYPE:
 	case INTEL_PCH_ADP3_DEVICE_ID_TYPE:
+	case INTEL_PCH_ADP4_DEVICE_ID_TYPE:
 		drm_dbg_kms(&dev_priv->drm, "Found Alder Lake PCH\n");
 		drm_WARN_ON(&dev_priv->drm, !IS_ALDERLAKE_S(dev_priv) &&
 			    !IS_ALDERLAKE_P(dev_priv));
diff --git a/drivers/gpu/drm/i915/intel_pch.h b/drivers/gpu/drm/i915/intel_pch.h
index 6bff775210943a2971b3c212eae60b3aa22347a8..6fd20408f7bfe49036aef73ac81378f04ec321a4 100644
--- a/drivers/gpu/drm/i915/intel_pch.h
+++ b/drivers/gpu/drm/i915/intel_pch.h
@@ -58,6 +58,7 @@ enum intel_pch {
 #define INTEL_PCH_ADP_DEVICE_ID_TYPE		0x7A80
 #define INTEL_PCH_ADP2_DEVICE_ID_TYPE		0x5180
 #define INTEL_PCH_ADP3_DEVICE_ID_TYPE		0x7A00
+#define INTEL_PCH_ADP4_DEVICE_ID_TYPE		0x5480
 #define INTEL_PCH_P2X_DEVICE_ID_TYPE		0x7100
 #define INTEL_PCH_P3X_DEVICE_ID_TYPE		0x7000
 #define INTEL_PCH_QEMU_DEVICE_ID_TYPE		0x2900 /* qemu q35 has 2918 */
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 02084652fe3daea61b4487883f120b7961bf00ff..5af16ca4dabd6e8ebd77e41e44745e8862548f40 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -50,11 +50,16 @@
 #include "i915_drv.h"
 #include "i915_fixed.h"
 #include "i915_irq.h"
+#include "intel_mchbar_regs.h"
 #include "intel_pcode.h"
 #include "intel_pm.h"
 #include "vlv_sideband.h"
 #include "../../../platform/x86/intel_ips.h"
 
+struct drm_i915_clock_gating_funcs {
+	void (*init_clock_gating)(struct drm_i915_private *i915);
+};
+
 /* Stores plane specific WM parameters */
 struct skl_wm_params {
 	bool x_tiled, y_tiled;
@@ -2942,27 +2947,27 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv,
 	} else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
 		u64 sskpd = intel_uncore_read64(uncore, MCH_SSKPD);
 
-		wm[0] = (sskpd >> 56) & 0xFF;
+		wm[0] = REG_FIELD_GET64(SSKPD_NEW_WM0_MASK_HSW, sskpd);
 		if (wm[0] == 0)
-			wm[0] = sskpd & 0xF;
-		wm[1] = (sskpd >> 4) & 0xFF;
-		wm[2] = (sskpd >> 12) & 0xFF;
-		wm[3] = (sskpd >> 20) & 0x1FF;
-		wm[4] = (sskpd >> 32) & 0x1FF;
+			wm[0] = REG_FIELD_GET64(SSKPD_OLD_WM0_MASK_HSW, sskpd);
+		wm[1] = REG_FIELD_GET64(SSKPD_WM1_MASK_HSW, sskpd);
+		wm[2] = REG_FIELD_GET64(SSKPD_WM2_MASK_HSW, sskpd);
+		wm[3] = REG_FIELD_GET64(SSKPD_WM3_MASK_HSW, sskpd);
+		wm[4] = REG_FIELD_GET64(SSKPD_WM4_MASK_HSW, sskpd);
 	} else if (DISPLAY_VER(dev_priv) >= 6) {
 		u32 sskpd = intel_uncore_read(uncore, MCH_SSKPD);
 
-		wm[0] = (sskpd >> SSKPD_WM0_SHIFT) & SSKPD_WM_MASK;
-		wm[1] = (sskpd >> SSKPD_WM1_SHIFT) & SSKPD_WM_MASK;
-		wm[2] = (sskpd >> SSKPD_WM2_SHIFT) & SSKPD_WM_MASK;
-		wm[3] = (sskpd >> SSKPD_WM3_SHIFT) & SSKPD_WM_MASK;
+		wm[0] = REG_FIELD_GET(SSKPD_WM0_MASK_SNB, sskpd);
+		wm[1] = REG_FIELD_GET(SSKPD_WM1_MASK_SNB, sskpd);
+		wm[2] = REG_FIELD_GET(SSKPD_WM2_MASK_SNB, sskpd);
+		wm[3] = REG_FIELD_GET(SSKPD_WM3_MASK_SNB, sskpd);
 	} else if (DISPLAY_VER(dev_priv) >= 5) {
 		u32 mltr = intel_uncore_read(uncore, MLTR_ILK);
 
 		/* ILK primary LP0 latency is 700 ns */
 		wm[0] = 7;
-		wm[1] = (mltr >> MLTR_WM1_SHIFT) & ILK_SRLT_MASK;
-		wm[2] = (mltr >> MLTR_WM2_SHIFT) & ILK_SRLT_MASK;
+		wm[1] = REG_FIELD_GET(MLTR_WM1_MASK, mltr);
+		wm[2] = REG_FIELD_GET(MLTR_WM2_MASK, mltr);
 	} else {
 		MISSING_CASE(INTEL_DEVID(dev_priv));
 	}
@@ -3175,12 +3180,8 @@ static int ilk_compute_pipe_wm(struct intel_atomic_state *state,
 	}
 
 	pipe_wm->pipe_enabled = crtc_state->hw.active;
-	if (sprstate) {
-		pipe_wm->sprites_enabled = sprstate->uapi.visible;
-		pipe_wm->sprites_scaled = sprstate->uapi.visible &&
-			(drm_rect_width(&sprstate->uapi.dst) != drm_rect_width(&sprstate->uapi.src) >> 16 ||
-			 drm_rect_height(&sprstate->uapi.dst) != drm_rect_height(&sprstate->uapi.src) >> 16);
-	}
+	pipe_wm->sprites_enabled = crtc_state->active_planes & BIT(PLANE_SPRITE0);
+	pipe_wm->sprites_scaled = crtc_state->scaled_planes & BIT(PLANE_SPRITE0);
 
 	usable_level = max_level;
 
@@ -3409,29 +3410,28 @@ static void ilk_compute_wm_results(struct drm_i915_private *dev_priv,
 		 * disabled. Doing otherwise could cause underruns.
 		 */
 		results->wm_lp[wm_lp - 1] =
-			(ilk_wm_lp_latency(dev_priv, level) << WM1_LP_LATENCY_SHIFT) |
-			(r->pri_val << WM1_LP_SR_SHIFT) |
-			r->cur_val;
+			WM_LP_LATENCY(ilk_wm_lp_latency(dev_priv, level)) |
+			WM_LP_PRIMARY(r->pri_val) |
+			WM_LP_CURSOR(r->cur_val);
 
 		if (r->enable)
-			results->wm_lp[wm_lp - 1] |= WM1_LP_SR_EN;
+			results->wm_lp[wm_lp - 1] |= WM_LP_ENABLE;
 
 		if (DISPLAY_VER(dev_priv) >= 8)
-			results->wm_lp[wm_lp - 1] |=
-				r->fbc_val << WM1_LP_FBC_SHIFT_BDW;
+			results->wm_lp[wm_lp - 1] |= WM_LP_FBC_BDW(r->fbc_val);
 		else
-			results->wm_lp[wm_lp - 1] |=
-				r->fbc_val << WM1_LP_FBC_SHIFT;
+			results->wm_lp[wm_lp - 1] |= WM_LP_FBC_ILK(r->fbc_val);
+
+		results->wm_lp_spr[wm_lp - 1] = WM_LP_SPRITE(r->spr_val);
 
 		/*
-		 * Always set WM1S_LP_EN when spr_val != 0, even if the
+		 * Always set WM_LP_SPRITE_EN when spr_val != 0, even if the
 		 * level is disabled. Doing otherwise could cause underruns.
 		 */
 		if (DISPLAY_VER(dev_priv) <= 6 && r->spr_val) {
 			drm_WARN_ON(&dev_priv->drm, wm_lp != 1);
-			results->wm_lp_spr[wm_lp - 1] = WM1S_LP_EN | r->spr_val;
-		} else
-			results->wm_lp_spr[wm_lp - 1] = r->spr_val;
+			results->wm_lp_spr[wm_lp - 1] |= WM_LP_SPRITE_ENABLE;
+		}
 	}
 
 	/* LP0 register values */
@@ -3444,9 +3444,9 @@ static void ilk_compute_wm_results(struct drm_i915_private *dev_priv,
 			continue;
 
 		results->wm_pipe[pipe] =
-			(r->pri_val << WM0_PIPE_PLANE_SHIFT) |
-			(r->spr_val << WM0_PIPE_SPRITE_SHIFT) |
-			r->cur_val;
+			WM0_PIPE_PRIMARY(r->pri_val) |
+			WM0_PIPE_SPRITE(r->spr_val) |
+			WM0_PIPE_CURSOR(r->cur_val);
 	}
 }
 
@@ -3538,24 +3538,24 @@ static bool _ilk_disable_lp_wm(struct drm_i915_private *dev_priv,
 	struct ilk_wm_values *previous = &dev_priv->wm.hw;
 	bool changed = false;
 
-	if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] & WM1_LP_SR_EN) {
-		previous->wm_lp[2] &= ~WM1_LP_SR_EN;
+	if (dirty & WM_DIRTY_LP(3) && previous->wm_lp[2] & WM_LP_ENABLE) {
+		previous->wm_lp[2] &= ~WM_LP_ENABLE;
 		intel_uncore_write(&dev_priv->uncore, WM3_LP_ILK, previous->wm_lp[2]);
 		changed = true;
 	}
-	if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] & WM1_LP_SR_EN) {
-		previous->wm_lp[1] &= ~WM1_LP_SR_EN;
+	if (dirty & WM_DIRTY_LP(2) && previous->wm_lp[1] & WM_LP_ENABLE) {
+		previous->wm_lp[1] &= ~WM_LP_ENABLE;
 		intel_uncore_write(&dev_priv->uncore, WM2_LP_ILK, previous->wm_lp[1]);
 		changed = true;
 	}
-	if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] & WM1_LP_SR_EN) {
-		previous->wm_lp[0] &= ~WM1_LP_SR_EN;
+	if (dirty & WM_DIRTY_LP(1) && previous->wm_lp[0] & WM_LP_ENABLE) {
+		previous->wm_lp[0] &= ~WM_LP_ENABLE;
 		intel_uncore_write(&dev_priv->uncore, WM1_LP_ILK, previous->wm_lp[0]);
 		changed = true;
 	}
 
 	/*
-	 * Don't touch WM1S_LP_EN here.
+	 * Don't touch WM_LP_SPRITE_ENABLE here.
 	 * Doing so could cause underruns.
 	 */
 
@@ -3781,48 +3781,55 @@ intel_disable_sagv(struct drm_i915_private *dev_priv)
 	return 0;
 }
 
-void intel_sagv_pre_plane_update(struct intel_atomic_state *state)
+static void skl_sagv_pre_plane_update(struct intel_atomic_state *state)
 {
-	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
-	const struct intel_bw_state *new_bw_state;
-	const struct intel_bw_state *old_bw_state;
-	u32 new_mask = 0;
+	struct drm_i915_private *i915 = to_i915(state->base.dev);
+	const struct intel_bw_state *new_bw_state =
+		intel_atomic_get_new_bw_state(state);
 
-	/*
-	 * Just return if we can't control SAGV or don't have it.
-	 * This is different from situation when we have SAGV but just can't
-	 * afford it due to DBuf limitation - in case if SAGV is completely
-	 * disabled in a BIOS, we are not even allowed to send a PCode request,
-	 * as it will throw an error. So have to check it here.
-	 */
-	if (!intel_has_sagv(dev_priv))
+	if (!new_bw_state)
 		return;
 
-	new_bw_state = intel_atomic_get_new_bw_state(state);
+	if (!intel_can_enable_sagv(i915, new_bw_state))
+		intel_disable_sagv(i915);
+}
+
+static void skl_sagv_post_plane_update(struct intel_atomic_state *state)
+{
+	struct drm_i915_private *i915 = to_i915(state->base.dev);
+	const struct intel_bw_state *new_bw_state =
+		intel_atomic_get_new_bw_state(state);
+
 	if (!new_bw_state)
 		return;
 
-	if (DISPLAY_VER(dev_priv) < 11 && !intel_can_enable_sagv(dev_priv, new_bw_state)) {
-		intel_disable_sagv(dev_priv);
-		return;
-	}
+	if (intel_can_enable_sagv(i915, new_bw_state))
+		intel_enable_sagv(i915);
+}
 
-	old_bw_state = intel_atomic_get_old_bw_state(state);
-	/*
-	 * Nothing to mask
-	 */
-	if (new_bw_state->qgv_points_mask == old_bw_state->qgv_points_mask)
+static void icl_sagv_pre_plane_update(struct intel_atomic_state *state)
+{
+	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+	const struct intel_bw_state *old_bw_state =
+		intel_atomic_get_old_bw_state(state);
+	const struct intel_bw_state *new_bw_state =
+		intel_atomic_get_new_bw_state(state);
+	u16 old_mask, new_mask;
+
+	if (!new_bw_state)
 		return;
 
+	old_mask = old_bw_state->qgv_points_mask;
 	new_mask = old_bw_state->qgv_points_mask | new_bw_state->qgv_points_mask;
 
-	/*
-	 * If new mask is zero - means there is nothing to mask,
-	 * we can only unmask, which should be done in unmask.
-	 */
-	if (!new_mask)
+	if (old_mask == new_mask)
 		return;
 
+	WARN_ON(!new_bw_state->base.changed);
+
+	drm_dbg_kms(&dev_priv->drm, "Restricting QGV points: 0x%x -> 0x%x\n",
+		    old_mask, new_mask);
+
 	/*
 	 * Restrict required qgv points before updating the configuration.
 	 * According to BSpec we can't mask and unmask qgv points at the same
@@ -3832,12 +3839,41 @@ void intel_sagv_pre_plane_update(struct intel_atomic_state *state)
 	icl_pcode_restrict_qgv_points(dev_priv, new_mask);
 }
 
-void intel_sagv_post_plane_update(struct intel_atomic_state *state)
+static void icl_sagv_post_plane_update(struct intel_atomic_state *state)
 {
 	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
-	const struct intel_bw_state *new_bw_state;
-	const struct intel_bw_state *old_bw_state;
-	u32 new_mask = 0;
+	const struct intel_bw_state *old_bw_state =
+		intel_atomic_get_old_bw_state(state);
+	const struct intel_bw_state *new_bw_state =
+		intel_atomic_get_new_bw_state(state);
+	u16 old_mask, new_mask;
+
+	if (!new_bw_state)
+		return;
+
+	old_mask = old_bw_state->qgv_points_mask | new_bw_state->qgv_points_mask;
+	new_mask = new_bw_state->qgv_points_mask;
+
+	if (old_mask == new_mask)
+		return;
+
+	WARN_ON(!new_bw_state->base.changed);
+
+	drm_dbg_kms(&dev_priv->drm, "Relaxing QGV points: 0x%x -> 0x%x\n",
+		    old_mask, new_mask);
+
+	/*
+	 * Allow required qgv points after updating the configuration.
+	 * According to BSpec we can't mask and unmask qgv points at the same
+	 * time. Also masking should be done before updating the configuration
+	 * and unmasking afterwards.
+	 */
+	icl_pcode_restrict_qgv_points(dev_priv, new_mask);
+}
+
+void intel_sagv_pre_plane_update(struct intel_atomic_state *state)
+{
+	struct drm_i915_private *i915 = to_i915(state->base.dev);
 
 	/*
 	 * Just return if we can't control SAGV or don't have it.
@@ -3846,34 +3882,33 @@ void intel_sagv_post_plane_update(struct intel_atomic_state *state)
 	 * disabled in a BIOS, we are not even allowed to send a PCode request,
 	 * as it will throw an error. So have to check it here.
 	 */
-	if (!intel_has_sagv(dev_priv))
+	if (!intel_has_sagv(i915))
 		return;
 
-	new_bw_state = intel_atomic_get_new_bw_state(state);
-	if (!new_bw_state)
-		return;
+	if (DISPLAY_VER(i915) >= 11)
+		icl_sagv_pre_plane_update(state);
+	else
+		skl_sagv_pre_plane_update(state);
+}
 
-	if (DISPLAY_VER(dev_priv) < 11 && intel_can_enable_sagv(dev_priv, new_bw_state)) {
-		intel_enable_sagv(dev_priv);
-		return;
-	}
+void intel_sagv_post_plane_update(struct intel_atomic_state *state)
+{
+	struct drm_i915_private *i915 = to_i915(state->base.dev);
 
-	old_bw_state = intel_atomic_get_old_bw_state(state);
 	/*
-	 * Nothing to unmask
+	 * Just return if we can't control SAGV or don't have it.
+	 * This is different from situation when we have SAGV but just can't
+	 * afford it due to DBuf limitation - in case if SAGV is completely
+	 * disabled in a BIOS, we are not even allowed to send a PCode request,
+	 * as it will throw an error. So have to check it here.
 	 */
-	if (new_bw_state->qgv_points_mask == old_bw_state->qgv_points_mask)
+	if (!intel_has_sagv(i915))
 		return;
 
-	new_mask = new_bw_state->qgv_points_mask;
-
-	/*
-	 * Allow required qgv points after updating the configuration.
-	 * According to BSpec we can't mask and unmask qgv points at the same
-	 * time. Also masking should be done before updating the configuration
-	 * and unmasking afterwards.
-	 */
-	icl_pcode_restrict_qgv_points(dev_priv, new_mask);
+	if (DISPLAY_VER(i915) >= 11)
+		icl_sagv_post_plane_update(state);
+	else
+		skl_sagv_post_plane_update(state);
 }
 
 static bool skl_crtc_can_enable_sagv(const struct intel_crtc_state *crtc_state)
@@ -4005,6 +4040,17 @@ static int intel_compute_sagv_mask(struct intel_atomic_state *state)
 			return ret;
 	}
 
+	if (intel_can_enable_sagv(dev_priv, new_bw_state) !=
+	    intel_can_enable_sagv(dev_priv, old_bw_state)) {
+		ret = intel_atomic_serialize_global_state(&new_bw_state->base);
+		if (ret)
+			return ret;
+	} else if (new_bw_state->pipe_sagv_reject != old_bw_state->pipe_sagv_reject) {
+		ret = intel_atomic_lock_global_state(&new_bw_state->base);
+		if (ret)
+			return ret;
+	}
+
 	for_each_new_intel_crtc_in_state(state, crtc,
 					 new_crtc_state, i) {
 		struct skl_pipe_wm *pipe_wm = &new_crtc_state->wm.skl.optimal;
@@ -4020,20 +4066,18 @@ static int intel_compute_sagv_mask(struct intel_atomic_state *state)
 			intel_can_enable_sagv(dev_priv, new_bw_state);
 	}
 
-	if (intel_can_enable_sagv(dev_priv, new_bw_state) !=
-	    intel_can_enable_sagv(dev_priv, old_bw_state)) {
-		ret = intel_atomic_serialize_global_state(&new_bw_state->base);
-		if (ret)
-			return ret;
-	} else if (new_bw_state->pipe_sagv_reject != old_bw_state->pipe_sagv_reject) {
-		ret = intel_atomic_lock_global_state(&new_bw_state->base);
-		if (ret)
-			return ret;
-	}
-
 	return 0;
 }
 
+static u16 skl_ddb_entry_init(struct skl_ddb_entry *entry,
+			      u16 start, u16 end)
+{
+	entry->start = start;
+	entry->end = end;
+
+	return end;
+}
+
 static int intel_dbuf_slice_size(struct drm_i915_private *dev_priv)
 {
 	return INTEL_INFO(dev_priv)->dbuf.size /
@@ -4172,8 +4216,7 @@ skl_crtc_allocate_ddb(struct intel_atomic_state *state, struct intel_crtc *crtc)
 	int ret;
 
 	if (new_dbuf_state->weight[pipe] == 0) {
-		new_dbuf_state->ddb[pipe].start = 0;
-		new_dbuf_state->ddb[pipe].end = 0;
+		skl_ddb_entry_init(&new_dbuf_state->ddb[pipe], 0, 0);
 		goto out;
 	}
 
@@ -4189,8 +4232,10 @@ skl_crtc_allocate_ddb(struct intel_atomic_state *state, struct intel_crtc *crtc)
 	start = ddb_range_size * weight_start / weight_total;
 	end = ddb_range_size * weight_end / weight_total;
 
-	new_dbuf_state->ddb[pipe].start = ddb_slices.start - mbus_offset + start;
-	new_dbuf_state->ddb[pipe].end = ddb_slices.start - mbus_offset + end;
+	skl_ddb_entry_init(&new_dbuf_state->ddb[pipe],
+			   ddb_slices.start - mbus_offset + start,
+			   ddb_slices.start - mbus_offset + end);
+
 out:
 	if (old_dbuf_state->slices[pipe] == new_dbuf_state->slices[pipe] &&
 	    skl_ddb_entry_equal(&old_dbuf_state->ddb[pipe],
@@ -4268,11 +4313,11 @@ skl_cursor_allocation(const struct intel_crtc_state *crtc_state,
 	return max(num_active == 1 ? 32 : 8, min_ddb_alloc);
 }
 
-static void skl_ddb_entry_init_from_hw(struct drm_i915_private *dev_priv,
-				       struct skl_ddb_entry *entry, u32 reg)
+static void skl_ddb_entry_init_from_hw(struct skl_ddb_entry *entry, u32 reg)
 {
-	entry->start = REG_FIELD_GET(PLANE_BUF_START_MASK, reg);
-	entry->end = REG_FIELD_GET(PLANE_BUF_END_MASK, reg);
+	skl_ddb_entry_init(entry,
+			   REG_FIELD_GET(PLANE_BUF_START_MASK, reg),
+			   REG_FIELD_GET(PLANE_BUF_END_MASK, reg));
 	if (entry->end)
 		entry->end++;
 }
@@ -4290,7 +4335,7 @@ skl_ddb_get_hw_plane_state(struct drm_i915_private *dev_priv,
 	/* Cursor doesn't support NV12/planar, so no extra calculation needed */
 	if (plane_id == PLANE_CURSOR) {
 		val = intel_uncore_read(&dev_priv->uncore, CUR_BUF_CFG(pipe));
-		skl_ddb_entry_init_from_hw(dev_priv, ddb_y, val);
+		skl_ddb_entry_init_from_hw(ddb_y, val);
 		return;
 	}
 
@@ -4304,7 +4349,7 @@ skl_ddb_get_hw_plane_state(struct drm_i915_private *dev_priv,
 
 	if (DISPLAY_VER(dev_priv) >= 11) {
 		val = intel_uncore_read(&dev_priv->uncore, PLANE_BUF_CFG(pipe, plane_id));
-		skl_ddb_entry_init_from_hw(dev_priv, ddb_y, val);
+		skl_ddb_entry_init_from_hw(ddb_y, val);
 	} else {
 		val = intel_uncore_read(&dev_priv->uncore, PLANE_BUF_CFG(pipe, plane_id));
 		val2 = intel_uncore_read(&dev_priv->uncore, PLANE_NV12_BUF_CFG(pipe, plane_id));
@@ -4313,8 +4358,8 @@ skl_ddb_get_hw_plane_state(struct drm_i915_private *dev_priv,
 		    drm_format_info_is_yuv_semiplanar(drm_format_info(fourcc)))
 			swap(val, val2);
 
-		skl_ddb_entry_init_from_hw(dev_priv, ddb_y, val);
-		skl_ddb_entry_init_from_hw(dev_priv, ddb_uv, val2);
+		skl_ddb_entry_init_from_hw(ddb_y, val);
+		skl_ddb_entry_init_from_hw(ddb_uv, val2);
 	}
 }
 
@@ -4342,55 +4387,6 @@ void skl_pipe_ddb_get_hw_state(struct intel_crtc *crtc,
 	intel_display_power_put(dev_priv, power_domain, wakeref);
 }
 
-/*
- * Determines the downscale amount of a plane for the purposes of watermark calculations.
- * The bspec defines downscale amount as:
- *
- * """
- * Horizontal down scale amount = maximum[1, Horizontal source size /
- *                                           Horizontal destination size]
- * Vertical down scale amount = maximum[1, Vertical source size /
- *                                         Vertical destination size]
- * Total down scale amount = Horizontal down scale amount *
- *                           Vertical down scale amount
- * """
- *
- * Return value is provided in 16.16 fixed point form to retain fractional part.
- * Caller should take care of dividing & rounding off the value.
- */
-static uint_fixed_16_16_t
-skl_plane_downscale_amount(const struct intel_crtc_state *crtc_state,
-			   const struct intel_plane_state *plane_state)
-{
-	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
-	u32 src_w, src_h, dst_w, dst_h;
-	uint_fixed_16_16_t fp_w_ratio, fp_h_ratio;
-	uint_fixed_16_16_t downscale_h, downscale_w;
-
-	if (drm_WARN_ON(&dev_priv->drm,
-			!intel_wm_plane_visible(crtc_state, plane_state)))
-		return u32_to_fixed16(0);
-
-	/*
-	 * Src coordinates are already rotated by 270 degrees for
-	 * the 90/270 degree plane rotation cases (to match the
-	 * GTT mapping), hence no need to account for rotation here.
-	 *
-	 * n.b., src is 16.16 fixed point, dst is whole integer.
-	 */
-	src_w = drm_rect_width(&plane_state->uapi.src) >> 16;
-	src_h = drm_rect_height(&plane_state->uapi.src) >> 16;
-	dst_w = drm_rect_width(&plane_state->uapi.dst);
-	dst_h = drm_rect_height(&plane_state->uapi.dst);
-
-	fp_w_ratio = div_fixed16(src_w, dst_w);
-	fp_h_ratio = div_fixed16(src_h, dst_h);
-	downscale_w = max_fixed16(fp_w_ratio, u32_to_fixed16(1));
-	downscale_h = max_fixed16(fp_h_ratio, u32_to_fixed16(1));
-
-	return mul_fixed16(downscale_w, downscale_h);
-}
-
 struct dbuf_slice_conf_entry {
 	u8 active_pipes;
 	u8 dbuf_mask[I915_MAX_PIPES];
@@ -4831,7 +4827,7 @@ static bool check_mbus_joined(u8 active_pipes,
 {
 	int i;
 
-	for (i = 0; i < dbuf_slices[i].active_pipes; i++) {
+	for (i = 0; dbuf_slices[i].active_pipes != 0; i++) {
 		if (dbuf_slices[i].active_pipes == active_pipes)
 			return dbuf_slices[i].join_mbus;
 	}
@@ -4848,7 +4844,7 @@ static u8 compute_dbuf_slices(enum pipe pipe, u8 active_pipes, bool join_mbus,
 {
 	int i;
 
-	for (i = 0; i < dbuf_slices[i].active_pipes; i++) {
+	for (i = 0; dbuf_slices[i].active_pipes != 0; i++) {
 		if (dbuf_slices[i].active_pipes == active_pipes &&
 		    dbuf_slices[i].join_mbus == join_mbus)
 			return dbuf_slices[i].dbuf_mask[pipe];
@@ -4946,10 +4942,7 @@ skl_plane_relative_data_rate(const struct intel_crtc_state *crtc_state,
 {
 	struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane);
 	const struct drm_framebuffer *fb = plane_state->hw.fb;
-	u32 data_rate;
-	u32 width = 0, height = 0;
-	uint_fixed_16_16_t down_scale_amount;
-	u64 rate;
+	int width, height;
 
 	if (!plane_state->uapi.visible)
 		return 0;
@@ -4983,14 +4976,7 @@ skl_plane_relative_data_rate(const struct intel_crtc_state *crtc_state,
 		height /= 2;
 	}
 
-	data_rate = width * height;
-
-	down_scale_amount = skl_plane_downscale_amount(crtc_state, plane_state);
-
-	rate = mul_round_up_u32_fixed16(data_rate, down_scale_amount);
-
-	rate *= fb->format->cpp[color_plane];
-	return rate;
+	return width * height * fb->format->cpp[color_plane];
 }
 
 static u64
@@ -5147,9 +5133,31 @@ static bool icl_need_wm1_wa(struct drm_i915_private *i915,
 	       (IS_DISPLAY_VER(i915, 12, 13) && plane_id == PLANE_CURSOR);
 }
 
+struct skl_plane_ddb_iter {
+	u64 data_rate;
+	u16 total[I915_MAX_PLANES];
+	u16 uv_total[I915_MAX_PLANES];
+	u16 start, size;
+};
+
+static u16
+skl_allocate_plane_ddb(struct skl_plane_ddb_iter *iter,
+		       const struct skl_wm_level *wm,
+		       u64 data_rate)
+{
+	u16 extra;
+
+	extra = min_t(u16, iter->size,
+		      DIV64_U64_ROUND_UP(iter->size * data_rate, iter->data_rate));
+	iter->size -= extra;
+	iter->data_rate -= data_rate;
+
+	return wm->min_ddb_alloc + extra;
+}
+
 static int
-skl_allocate_plane_ddb(struct intel_atomic_state *state,
-		       struct intel_crtc *crtc)
+skl_crtc_allocate_plane_ddb(struct intel_atomic_state *state,
+			    struct intel_crtc *crtc)
 {
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	struct intel_crtc_state *crtc_state =
@@ -5158,10 +5166,7 @@ skl_allocate_plane_ddb(struct intel_atomic_state *state,
 		intel_atomic_get_new_dbuf_state(state);
 	const struct skl_ddb_entry *alloc = &dbuf_state->ddb[crtc->pipe];
 	int num_active = hweight8(dbuf_state->active_pipes);
-	u16 alloc_size, start = 0;
-	u16 total[I915_MAX_PLANES] = {};
-	u16 uv_total[I915_MAX_PLANES] = {};
-	u64 total_data_rate;
+	struct skl_plane_ddb_iter iter = {};
 	enum plane_id plane_id;
 	u32 blocks;
 	int level;
@@ -5174,24 +5179,21 @@ skl_allocate_plane_ddb(struct intel_atomic_state *state,
 		return 0;
 
 	if (DISPLAY_VER(dev_priv) >= 11)
-		total_data_rate =
-			icl_get_total_relative_data_rate(state, crtc);
+		iter.data_rate = icl_get_total_relative_data_rate(state, crtc);
 	else
-		total_data_rate =
-			skl_get_total_relative_data_rate(state, crtc);
+		iter.data_rate = skl_get_total_relative_data_rate(state, crtc);
 
-	alloc_size = skl_ddb_entry_size(alloc);
-	if (alloc_size == 0)
+	iter.size = skl_ddb_entry_size(alloc);
+	if (iter.size == 0)
 		return 0;
 
 	/* Allocate fixed number of blocks for cursor. */
-	total[PLANE_CURSOR] = skl_cursor_allocation(crtc_state, num_active);
-	alloc_size -= total[PLANE_CURSOR];
-	crtc_state->wm.skl.plane_ddb_y[PLANE_CURSOR].start =
-		alloc->end - total[PLANE_CURSOR];
-	crtc_state->wm.skl.plane_ddb_y[PLANE_CURSOR].end = alloc->end;
+	iter.total[PLANE_CURSOR] = skl_cursor_allocation(crtc_state, num_active);
+	iter.size -= iter.total[PLANE_CURSOR];
+	skl_ddb_entry_init(&crtc_state->wm.skl.plane_ddb_y[PLANE_CURSOR],
+			   alloc->end - iter.total[PLANE_CURSOR], alloc->end);
 
-	if (total_data_rate == 0)
+	if (iter.data_rate == 0)
 		return 0;
 
 	/*
@@ -5205,7 +5207,7 @@ skl_allocate_plane_ddb(struct intel_atomic_state *state,
 				&crtc_state->wm.skl.optimal.planes[plane_id];
 
 			if (plane_id == PLANE_CURSOR) {
-				if (wm->wm[level].min_ddb_alloc > total[PLANE_CURSOR]) {
+				if (wm->wm[level].min_ddb_alloc > iter.total[PLANE_CURSOR]) {
 					drm_WARN_ON(&dev_priv->drm,
 						    wm->wm[level].min_ddb_alloc != U16_MAX);
 					blocks = U32_MAX;
@@ -5218,8 +5220,8 @@ skl_allocate_plane_ddb(struct intel_atomic_state *state,
 			blocks += wm->uv_wm[level].min_ddb_alloc;
 		}
 
-		if (blocks <= alloc_size) {
-			alloc_size -= blocks;
+		if (blocks <= iter.size) {
+			iter.size -= blocks;
 			break;
 		}
 	}
@@ -5228,7 +5230,7 @@ skl_allocate_plane_ddb(struct intel_atomic_state *state,
 		drm_dbg_kms(&dev_priv->drm,
 			    "Requested display configuration exceeds system DDB limitations");
 		drm_dbg_kms(&dev_priv->drm, "minimum required %d/%d\n",
-			    blocks, alloc_size);
+			    blocks, iter.size);
 		return -EINVAL;
 	}
 
@@ -5240,8 +5242,6 @@ skl_allocate_plane_ddb(struct intel_atomic_state *state,
 	for_each_plane_id_on_crtc(crtc, plane_id) {
 		const struct skl_plane_wm *wm =
 			&crtc_state->wm.skl.optimal.planes[plane_id];
-		u64 rate;
-		u16 extra;
 
 		if (plane_id == PLANE_CURSOR)
 			continue;
@@ -5250,32 +5250,24 @@ skl_allocate_plane_ddb(struct intel_atomic_state *state,
 		 * We've accounted for all active planes; remaining planes are
 		 * all disabled.
 		 */
-		if (total_data_rate == 0)
+		if (iter.data_rate == 0)
 			break;
 
-		rate = crtc_state->plane_data_rate[plane_id];
-		extra = min_t(u16, alloc_size,
-			      DIV64_U64_ROUND_UP(alloc_size * rate,
-						 total_data_rate));
-		total[plane_id] = wm->wm[level].min_ddb_alloc + extra;
-		alloc_size -= extra;
-		total_data_rate -= rate;
+		iter.total[plane_id] =
+			skl_allocate_plane_ddb(&iter, &wm->wm[level],
+					       crtc_state->plane_data_rate[plane_id]);
 
-		if (total_data_rate == 0)
+		if (iter.data_rate == 0)
 			break;
 
-		rate = crtc_state->uv_plane_data_rate[plane_id];
-		extra = min_t(u16, alloc_size,
-			      DIV64_U64_ROUND_UP(alloc_size * rate,
-						 total_data_rate));
-		uv_total[plane_id] = wm->uv_wm[level].min_ddb_alloc + extra;
-		alloc_size -= extra;
-		total_data_rate -= rate;
+		iter.uv_total[plane_id] =
+			skl_allocate_plane_ddb(&iter, &wm->uv_wm[level],
+					       crtc_state->uv_plane_data_rate[plane_id]);
 	}
-	drm_WARN_ON(&dev_priv->drm, alloc_size != 0 || total_data_rate != 0);
+	drm_WARN_ON(&dev_priv->drm, iter.size != 0 || iter.data_rate != 0);
 
 	/* Set the actual DDB start/end points for each plane */
-	start = alloc->start;
+	iter.start = alloc->start;
 	for_each_plane_id_on_crtc(crtc, plane_id) {
 		struct skl_ddb_entry *plane_alloc =
 			&crtc_state->wm.skl.plane_ddb_y[plane_id];
@@ -5287,20 +5279,16 @@ skl_allocate_plane_ddb(struct intel_atomic_state *state,
 
 		/* Gen11+ uses a separate plane for UV watermarks */
 		drm_WARN_ON(&dev_priv->drm,
-			    DISPLAY_VER(dev_priv) >= 11 && uv_total[plane_id]);
+			    DISPLAY_VER(dev_priv) >= 11 && iter.uv_total[plane_id]);
 
 		/* Leave disabled planes at (0,0) */
-		if (total[plane_id]) {
-			plane_alloc->start = start;
-			start += total[plane_id];
-			plane_alloc->end = start;
-		}
+		if (iter.total[plane_id])
+			iter.start = skl_ddb_entry_init(plane_alloc, iter.start,
+							iter.start + iter.total[plane_id]);
 
-		if (uv_total[plane_id]) {
-			uv_plane_alloc->start = start;
-			start += uv_total[plane_id];
-			uv_plane_alloc->end = start;
-		}
+		if (iter.uv_total[plane_id])
+			iter.start = skl_ddb_entry_init(uv_plane_alloc, iter.start,
+							iter.start + iter.uv_total[plane_id]);
 	}
 
 	/*
@@ -5315,7 +5303,8 @@ skl_allocate_plane_ddb(struct intel_atomic_state *state,
 				&crtc_state->wm.skl.optimal.planes[plane_id];
 
 			skl_check_nv12_wm_level(&wm->wm[level], &wm->uv_wm[level],
-						total[plane_id], uv_total[plane_id]);
+						iter.total[plane_id],
+						iter.uv_total[plane_id]);
 
 			if (icl_need_wm1_wa(dev_priv, plane_id) &&
 			    level == 1 && wm->wm[0].enable) {
@@ -5334,9 +5323,9 @@ skl_allocate_plane_ddb(struct intel_atomic_state *state,
 		struct skl_plane_wm *wm =
 			&crtc_state->wm.skl.optimal.planes[plane_id];
 
-		skl_check_wm_level(&wm->trans_wm, total[plane_id]);
-		skl_check_wm_level(&wm->sagv.wm0, total[plane_id]);
-		skl_check_wm_level(&wm->sagv.trans_wm, total[plane_id]);
+		skl_check_wm_level(&wm->trans_wm, iter.total[plane_id]);
+		skl_check_wm_level(&wm->sagv.wm0, iter.total[plane_id]);
+		skl_check_wm_level(&wm->sagv.trans_wm, iter.total[plane_id]);
 	}
 
 	return 0;
@@ -6226,7 +6215,7 @@ skl_compute_ddb(struct intel_atomic_state *state)
 
 	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
 					    new_crtc_state, i) {
-		ret = skl_allocate_plane_ddb(state, crtc);
+		ret = skl_crtc_allocate_plane_ddb(state, crtc);
 		if (ret)
 			return ret;
 
@@ -6803,9 +6792,9 @@ static void ilk_pipe_wm_get_hw_state(struct intel_crtc *crtc)
 		 * multiple pipes are active.
 		 */
 		active->wm[0].enable = true;
-		active->wm[0].pri_val = (tmp & WM0_PIPE_PLANE_MASK) >> WM0_PIPE_PLANE_SHIFT;
-		active->wm[0].spr_val = (tmp & WM0_PIPE_SPRITE_MASK) >> WM0_PIPE_SPRITE_SHIFT;
-		active->wm[0].cur_val = tmp & WM0_PIPE_CURSOR_MASK;
+		active->wm[0].pri_val = REG_FIELD_GET(WM0_PIPE_PRIMARY_MASK, tmp);
+		active->wm[0].spr_val = REG_FIELD_GET(WM0_PIPE_SPRITE_MASK, tmp);
+		active->wm[0].cur_val = REG_FIELD_GET(WM0_PIPE_CURSOR_MASK, tmp);
 	} else {
 		int level, max_level = ilk_wm_max_level(dev_priv);
 
@@ -7229,12 +7218,12 @@ void vlv_wm_sanitize(struct drm_i915_private *dev_priv)
  */
 static void ilk_init_lp_watermarks(struct drm_i915_private *dev_priv)
 {
-	intel_uncore_write(&dev_priv->uncore, WM3_LP_ILK, intel_uncore_read(&dev_priv->uncore, WM3_LP_ILK) & ~WM1_LP_SR_EN);
-	intel_uncore_write(&dev_priv->uncore, WM2_LP_ILK, intel_uncore_read(&dev_priv->uncore, WM2_LP_ILK) & ~WM1_LP_SR_EN);
-	intel_uncore_write(&dev_priv->uncore, WM1_LP_ILK, intel_uncore_read(&dev_priv->uncore, WM1_LP_ILK) & ~WM1_LP_SR_EN);
+	intel_uncore_write(&dev_priv->uncore, WM3_LP_ILK, intel_uncore_read(&dev_priv->uncore, WM3_LP_ILK) & ~WM_LP_ENABLE);
+	intel_uncore_write(&dev_priv->uncore, WM2_LP_ILK, intel_uncore_read(&dev_priv->uncore, WM2_LP_ILK) & ~WM_LP_ENABLE);
+	intel_uncore_write(&dev_priv->uncore, WM1_LP_ILK, intel_uncore_read(&dev_priv->uncore, WM1_LP_ILK) & ~WM_LP_ENABLE);
 
 	/*
-	 * Don't touch WM1S_LP_EN here.
+	 * Don't touch WM_LP_SPRITE_ENABLE here.
 	 * Doing so could cause underruns.
 	 */
 }
@@ -7437,7 +7426,7 @@ static void gen6_check_mch_setup(struct drm_i915_private *dev_priv)
 	u32 tmp;
 
 	tmp = intel_uncore_read(&dev_priv->uncore, MCH_SSKPD);
-	if ((tmp & MCH_SSKPD_WM0_MASK) != MCH_SSKPD_WM0_VAL)
+	if (REG_FIELD_GET(SSKPD_WM0_MASK_SNB, tmp) != 12)
 		drm_dbg_kms(&dev_priv->drm,
 			    "Wrong MCH_SSKPD value: 0x%08x This can cause underruns.\n",
 			    tmp);
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index 3293ac71bcf865bb0ede9db4eb77e3c1e1dda845..6ed5786bcd299cec96a66025f9b924626ed4d172 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -77,6 +77,9 @@ track_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm)
 	depot_stack_handle_t stack, *stacks;
 	unsigned long flags;
 
+	if (rpm->no_wakeref_tracking)
+		return -1;
+
 	stack = __save_depot_stack();
 	if (!stack)
 		return -1;
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.h b/drivers/gpu/drm/i915/intel_runtime_pm.h
index 47a85fab4130787067575ccbd2c7c2d374644083..d9160e3ff4afcf99adb8da12c58f0b1779da252b 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.h
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.h
@@ -51,6 +51,7 @@ struct intel_runtime_pm {
 	bool available;
 	bool suspended;
 	bool irqs_enabled;
+	bool no_wakeref_tracking;
 
 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
 	/*
diff --git a/drivers/gpu/drm/i915/intel_step.c b/drivers/gpu/drm/i915/intel_step.c
index a4b16b9e2e55dda33ff0e7ca98fc9aad04523e8c..ac1a796b280846fabe5c112b8d3af201af938e8f 100644
--- a/drivers/gpu/drm/i915/intel_step.c
+++ b/drivers/gpu/drm/i915/intel_step.c
@@ -122,6 +122,15 @@ static const struct intel_step_info dg2_g11_revid_step_tbl[] = {
 	[0x5] = { COMMON_GT_MEDIA_STEP(B1), .display_step = STEP_C0 },
 };
 
+static const struct intel_step_info dg2_g12_revid_step_tbl[] = {
+	[0x0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_C0 },
+};
+
+static const struct intel_step_info adls_rpls_revids[] = {
+	[0x4] = { COMMON_GT_MEDIA_STEP(D0), .display_step = STEP_D0 },
+	[0xC] = { COMMON_GT_MEDIA_STEP(D0), .display_step = STEP_C0 },
+};
+
 void intel_step_init(struct drm_i915_private *i915)
 {
 	const struct intel_step_info *revids = NULL;
@@ -135,12 +144,18 @@ void intel_step_init(struct drm_i915_private *i915)
 	} else if (IS_DG2_G11(i915)) {
 		revids = dg2_g11_revid_step_tbl;
 		size = ARRAY_SIZE(dg2_g11_revid_step_tbl);
+	} else if (IS_DG2_G12(i915)) {
+		revids = dg2_g12_revid_step_tbl;
+		size = ARRAY_SIZE(dg2_g12_revid_step_tbl);
 	} else if (IS_XEHPSDV(i915)) {
 		revids = xehpsdv_revids;
 		size = ARRAY_SIZE(xehpsdv_revids);
 	} else if (IS_ALDERLAKE_P(i915)) {
 		revids = adlp_revids;
 		size = ARRAY_SIZE(adlp_revids);
+	} else if (IS_ADLS_RPLS(i915)) {
+		revids = adls_rpls_revids;
+		size = ARRAY_SIZE(adls_rpls_revids);
 	} else if (IS_ALDERLAKE_S(i915)) {
 		revids = adls_revids;
 		size = ARRAY_SIZE(adls_revids);
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index 850ebfae31af0088f6650319b37cc5cd4168bc5b..dd8fdd5863de9ff254710d76d59d1036b10e4763 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -1496,7 +1496,7 @@ ilk_dummy_write(struct intel_uncore *uncore)
 	/* WaIssueDummyWriteToWakeupFromRC6:ilk Issue a dummy write to wake up
 	 * the chip from rc6 before touching it for real. MI_MODE is masked,
 	 * hence harmless to write 0 into. */
-	__raw_uncore_write32(uncore, MI_MODE, 0);
+	__raw_uncore_write32(uncore, RING_MI_MODE(RENDER_RING_BASE), 0);
 }
 
 static void
diff --git a/drivers/gpu/drm/i915/intel_wopcm.c b/drivers/gpu/drm/i915/intel_wopcm.c
index f06d2100510651e5668bb868fddea2f011353c86..322fb9eeb8802a19bac3680f46e6c21330aa8ee4 100644
--- a/drivers/gpu/drm/i915/intel_wopcm.c
+++ b/drivers/gpu/drm/i915/intel_wopcm.c
@@ -43,6 +43,7 @@
 /* Default WOPCM size is 2MB from Gen11, 1MB on previous platforms */
 #define GEN11_WOPCM_SIZE		SZ_2M
 #define GEN9_WOPCM_SIZE			SZ_1M
+#define MAX_WOPCM_SIZE			SZ_8M
 /* 16KB WOPCM (RSVD WOPCM) is reserved from HuC firmware top. */
 #define WOPCM_RESERVED_SIZE		SZ_16K
 
@@ -207,6 +208,14 @@ static bool __wopcm_regs_locked(struct intel_uncore *uncore,
 	return true;
 }
 
+static bool __wopcm_regs_writable(struct intel_uncore *uncore)
+{
+	if (!HAS_GUC_DEPRIVILEGE(uncore->i915))
+		return true;
+
+	return intel_uncore_read(uncore, GUC_SHIM_CONTROL2) & GUC_IS_PRIVILEGED;
+}
+
 /**
  * intel_wopcm_init() - Initialize the WOPCM structure.
  * @wopcm: pointer to intel_wopcm.
@@ -224,18 +233,19 @@ void intel_wopcm_init(struct intel_wopcm *wopcm)
 	u32 guc_fw_size = intel_uc_fw_get_upload_size(&gt->uc.guc.fw);
 	u32 huc_fw_size = intel_uc_fw_get_upload_size(&gt->uc.huc.fw);
 	u32 ctx_rsvd = context_reserved_size(i915);
+	u32 wopcm_size = wopcm->size;
 	u32 guc_wopcm_base;
 	u32 guc_wopcm_size;
 
 	if (!guc_fw_size)
 		return;
 
-	GEM_BUG_ON(!wopcm->size);
+	GEM_BUG_ON(!wopcm_size);
 	GEM_BUG_ON(wopcm->guc.base);
 	GEM_BUG_ON(wopcm->guc.size);
-	GEM_BUG_ON(guc_fw_size >= wopcm->size);
-	GEM_BUG_ON(huc_fw_size >= wopcm->size);
-	GEM_BUG_ON(ctx_rsvd + WOPCM_RESERVED_SIZE >= wopcm->size);
+	GEM_BUG_ON(guc_fw_size >= wopcm_size);
+	GEM_BUG_ON(huc_fw_size >= wopcm_size);
+	GEM_BUG_ON(ctx_rsvd + WOPCM_RESERVED_SIZE >= wopcm_size);
 
 	if (i915_inject_probe_failure(i915))
 		return;
@@ -243,6 +253,24 @@ void intel_wopcm_init(struct intel_wopcm *wopcm)
 	if (__wopcm_regs_locked(gt->uncore, &guc_wopcm_base, &guc_wopcm_size)) {
 		drm_dbg(&i915->drm, "GuC WOPCM is already locked [%uK, %uK)\n",
 			guc_wopcm_base / SZ_1K, guc_wopcm_size / SZ_1K);
+		/*
+		 * Note that to keep things simple (i.e. avoid different
+		 * defines per platform) our WOPCM math doesn't always use the
+		 * actual WOPCM size, but a value that is less or equal to it.
+		 * This is perfectly fine when i915 programs the registers, but
+		 * on platforms with GuC deprivilege the registers are not
+		 * writable from i915 and are instead pre-programmed by the
+		 * bios/IFWI, so there might be a mismatch of sizes.
+		 * Instead of handling the size difference, we trust that the
+		 * programmed values make sense and disable the relevant check
+		 * by using the maximum possible WOPCM size in the verification
+		 * math. In the extremely unlikely case that the registers
+		 * were pre-programmed with an invalid value, we will still
+		 * gracefully fail later during the GuC/HuC dma.
+		 */
+		if (!__wopcm_regs_writable(gt->uncore))
+			wopcm_size = MAX_WOPCM_SIZE;
+
 		goto check;
 	}
 
@@ -257,17 +285,17 @@ void intel_wopcm_init(struct intel_wopcm *wopcm)
 	 * Need to clamp guc_wopcm_base now to make sure the following math is
 	 * correct. Formal check of whole WOPCM layout will be done below.
 	 */
-	guc_wopcm_base = min(guc_wopcm_base, wopcm->size - ctx_rsvd);
+	guc_wopcm_base = min(guc_wopcm_base, wopcm_size - ctx_rsvd);
 
 	/* Aligned remainings of usable WOPCM space can be assigned to GuC. */
-	guc_wopcm_size = wopcm->size - ctx_rsvd - guc_wopcm_base;
+	guc_wopcm_size = wopcm_size - ctx_rsvd - guc_wopcm_base;
 	guc_wopcm_size &= GUC_WOPCM_SIZE_MASK;
 
 	drm_dbg(&i915->drm, "Calculated GuC WOPCM [%uK, %uK)\n",
 		guc_wopcm_base / SZ_1K, guc_wopcm_size / SZ_1K);
 
 check:
-	if (__check_layout(i915, wopcm->size, guc_wopcm_base, guc_wopcm_size,
+	if (__check_layout(i915, wopcm_size, guc_wopcm_base, guc_wopcm_size,
 			   guc_fw_size, huc_fw_size)) {
 		wopcm->guc.base = guc_wopcm_base;
 		wopcm->guc.size = guc_wopcm_size;
diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_pm.h b/drivers/gpu/drm/i915/pxp/intel_pxp_pm.h
index 16990a3f2f8518a927746013ce9678a545444767..586be769104f1540beb7a8f88559d38e2bfe509c 100644
--- a/drivers/gpu/drm/i915/pxp/intel_pxp_pm.h
+++ b/drivers/gpu/drm/i915/pxp/intel_pxp_pm.h
@@ -6,7 +6,7 @@
 #ifndef __INTEL_PXP_PM_H__
 #define __INTEL_PXP_PM_H__
 
-#include "intel_pxp_types.h"
+struct intel_pxp;
 
 #ifdef CONFIG_DRM_I915_PXP
 void intel_pxp_suspend_prepare(struct intel_pxp *pxp);
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c
index b5576888cd78e3fcd87ea1de5551398253eaadcd..e5dd82e7e480f0d210430e2b3e6d490f1af96940 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem.c
@@ -6,9 +6,10 @@
 
 #include <linux/random.h>
 
+#include "gem/i915_gem_internal.h"
+#include "gem/i915_gem_pm.h"
 #include "gem/selftests/igt_gem_utils.h"
 #include "gem/selftests/mock_context.h"
-#include "gem/i915_gem_pm.h"
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_pm.h"
 
@@ -41,7 +42,7 @@ static int switch_to_context(struct i915_gem_context *ctx)
 
 static void trash_stolen(struct drm_i915_private *i915)
 {
-	struct i915_ggtt *ggtt = &i915->ggtt;
+	struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
 	const u64 slot = ggtt->error_capture.start;
 	const resource_size_t size = resource_size(&i915->dsm);
 	unsigned long page;
@@ -99,7 +100,7 @@ static void igt_pm_suspend(struct drm_i915_private *i915)
 	intel_wakeref_t wakeref;
 
 	with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
-		i915_ggtt_suspend(&i915->ggtt);
+		i915_ggtt_suspend(to_gt(i915)->ggtt);
 		i915_gem_suspend_late(i915);
 	}
 }
@@ -109,7 +110,7 @@ static void igt_pm_hibernate(struct drm_i915_private *i915)
 	intel_wakeref_t wakeref;
 
 	with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
-		i915_ggtt_suspend(&i915->ggtt);
+		i915_ggtt_suspend(to_gt(i915)->ggtt);
 
 		i915_gem_freeze(i915);
 		i915_gem_freeze_late(i915);
@@ -125,7 +126,7 @@ static void igt_pm_resume(struct drm_i915_private *i915)
 	 * that runtime-pm just works.
 	 */
 	with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
-		i915_ggtt_resume(&i915->ggtt);
+		i915_ggtt_resume(to_gt(i915)->ggtt);
 		i915_gem_resume(i915);
 	}
 }
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
index 75b709c26dd3c7c1803275e9f22ebb7c9aba88a5..8c6517d29b8e0c409626b9d3694995cd191b151e 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c
@@ -22,6 +22,7 @@
  *
  */
 
+#include "gem/i915_gem_internal.h"
 #include "gem/i915_gem_pm.h"
 #include "gem/selftests/igt_gem_utils.h"
 #include "gem/selftests/mock_context.h"
@@ -117,7 +118,7 @@ static int igt_evict_something(void *arg)
 
 	/* Everything is pinned, nothing should happen */
 	mutex_lock(&ggtt->vm.mutex);
-	err = i915_gem_evict_something(&ggtt->vm,
+	err = i915_gem_evict_something(&ggtt->vm, NULL,
 				       I915_GTT_PAGE_SIZE, 0, 0,
 				       0, U64_MAX,
 				       0);
@@ -132,7 +133,7 @@ static int igt_evict_something(void *arg)
 
 	/* Everything is unpinned, we should be able to evict something */
 	mutex_lock(&ggtt->vm.mutex);
-	err = i915_gem_evict_something(&ggtt->vm,
+	err = i915_gem_evict_something(&ggtt->vm, NULL,
 				       I915_GTT_PAGE_SIZE, 0, 0,
 				       0, U64_MAX,
 				       0);
@@ -204,7 +205,7 @@ static int igt_evict_for_vma(void *arg)
 
 	/* Everything is pinned, nothing should happen */
 	mutex_lock(&ggtt->vm.mutex);
-	err = i915_gem_evict_for_node(&ggtt->vm, &target, 0);
+	err = i915_gem_evict_for_node(&ggtt->vm, NULL, &target, 0);
 	mutex_unlock(&ggtt->vm.mutex);
 	if (err != -ENOSPC) {
 		pr_err("i915_gem_evict_for_node on a full GGTT returned err=%d\n",
@@ -216,7 +217,7 @@ static int igt_evict_for_vma(void *arg)
 
 	/* Everything is unpinned, we should be able to evict the node */
 	mutex_lock(&ggtt->vm.mutex);
-	err = i915_gem_evict_for_node(&ggtt->vm, &target, 0);
+	err = i915_gem_evict_for_node(&ggtt->vm, NULL, &target, 0);
 	mutex_unlock(&ggtt->vm.mutex);
 	if (err) {
 		pr_err("i915_gem_evict_for_node returned err=%d\n",
@@ -297,7 +298,7 @@ static int igt_evict_for_cache_color(void *arg)
 
 	/* Remove just the second vma */
 	mutex_lock(&ggtt->vm.mutex);
-	err = i915_gem_evict_for_node(&ggtt->vm, &target, 0);
+	err = i915_gem_evict_for_node(&ggtt->vm, NULL, &target, 0);
 	mutex_unlock(&ggtt->vm.mutex);
 	if (err) {
 		pr_err("[0]i915_gem_evict_for_node returned err=%d\n", err);
@@ -310,7 +311,7 @@ static int igt_evict_for_cache_color(void *arg)
 	target.color = I915_CACHE_L3_LLC;
 
 	mutex_lock(&ggtt->vm.mutex);
-	err = i915_gem_evict_for_node(&ggtt->vm, &target, 0);
+	err = i915_gem_evict_for_node(&ggtt->vm, NULL, &target, 0);
 	mutex_unlock(&ggtt->vm.mutex);
 	if (!err) {
 		pr_err("[1]i915_gem_evict_for_node returned err=%d\n", err);
@@ -331,6 +332,7 @@ static int igt_evict_vm(void *arg)
 {
 	struct intel_gt *gt = arg;
 	struct i915_ggtt *ggtt = gt->ggtt;
+	struct i915_gem_ww_ctx ww;
 	LIST_HEAD(objects);
 	int err;
 
@@ -342,7 +344,7 @@ static int igt_evict_vm(void *arg)
 
 	/* Everything is pinned, nothing should happen */
 	mutex_lock(&ggtt->vm.mutex);
-	err = i915_gem_evict_vm(&ggtt->vm);
+	err = i915_gem_evict_vm(&ggtt->vm, NULL);
 	mutex_unlock(&ggtt->vm.mutex);
 	if (err) {
 		pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n",
@@ -352,9 +354,12 @@ static int igt_evict_vm(void *arg)
 
 	unpin_ggtt(ggtt);
 
-	mutex_lock(&ggtt->vm.mutex);
-	err = i915_gem_evict_vm(&ggtt->vm);
-	mutex_unlock(&ggtt->vm.mutex);
+	for_i915_gem_ww(&ww, err, false) {
+		mutex_lock(&ggtt->vm.mutex);
+		err = i915_gem_evict_vm(&ggtt->vm, &ww);
+		mutex_unlock(&ggtt->vm.mutex);
+	}
+
 	if (err) {
 		pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n",
 		       err);
@@ -402,7 +407,7 @@ static int igt_evict_contexts(void *arg)
 	/* Reserve a block so that we know we have enough to fit a few rq */
 	memset(&hole, 0, sizeof(hole));
 	mutex_lock(&ggtt->vm.mutex);
-	err = i915_gem_gtt_insert(&ggtt->vm, &hole,
+	err = i915_gem_gtt_insert(&ggtt->vm, NULL, &hole,
 				  PRETEND_GGTT_SIZE, 0, I915_COLOR_UNEVICTABLE,
 				  0, ggtt->vm.total,
 				  PIN_NOEVICT);
@@ -422,7 +427,7 @@ static int igt_evict_contexts(void *arg)
 			goto out_locked;
 		}
 
-		if (i915_gem_gtt_insert(&ggtt->vm, &r->node,
+		if (i915_gem_gtt_insert(&ggtt->vm, NULL, &r->node,
 					1ul << 20, 0, I915_COLOR_UNEVICTABLE,
 					0, ggtt->vm.total,
 					PIN_NOEVICT)) {
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index 575705c3bce9ad0373bf9c6252b76a65c4241034..e7e6c4b2c81dc8afb19ec1ee1a0ee1d75997daad 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -26,12 +26,14 @@
 #include <linux/prime_numbers.h>
 
 #include "gem/i915_gem_context.h"
+#include "gem/i915_gem_internal.h"
 #include "gem/selftests/mock_context.h"
 #include "gt/intel_context.h"
 #include "gt/intel_gpu_commands.h"
 
 #include "i915_random.h"
 #include "i915_selftest.h"
+#include "i915_vma_resource.h"
 
 #include "mock_drm.h"
 #include "mock_gem_device.h"
@@ -238,11 +240,11 @@ static int lowlevel_hole(struct i915_address_space *vm,
 			 unsigned long end_time)
 {
 	I915_RND_STATE(seed_prng);
-	struct i915_vma *mock_vma;
+	struct i915_vma_resource *mock_vma_res;
 	unsigned int size;
 
-	mock_vma = kzalloc(sizeof(*mock_vma), GFP_KERNEL);
-	if (!mock_vma)
+	mock_vma_res = kzalloc(sizeof(*mock_vma_res), GFP_KERNEL);
+	if (!mock_vma_res)
 		return -ENOMEM;
 
 	/* Keep creating larger objects until one cannot fit into the hole */
@@ -268,7 +270,7 @@ static int lowlevel_hole(struct i915_address_space *vm,
 				break;
 		} while (count >>= 1);
 		if (!count) {
-			kfree(mock_vma);
+			kfree(mock_vma_res);
 			return -ENOMEM;
 		}
 		GEM_BUG_ON(!order);
@@ -342,12 +344,12 @@ static int lowlevel_hole(struct i915_address_space *vm,
 					break;
 			}
 
-			mock_vma->pages = obj->mm.pages;
-			mock_vma->node.size = BIT_ULL(size);
-			mock_vma->node.start = addr;
+			mock_vma_res->bi.pages = obj->mm.pages;
+			mock_vma_res->node_size = BIT_ULL(size);
+			mock_vma_res->start = addr;
 
 			with_intel_runtime_pm(vm->gt->uncore->rpm, wakeref)
-				vm->insert_entries(vm, mock_vma,
+			  vm->insert_entries(vm, mock_vma_res,
 						   I915_CACHE_NONE, 0);
 		}
 		count = n;
@@ -370,7 +372,7 @@ static int lowlevel_hole(struct i915_address_space *vm,
 		cleanup_freed_objects(vm->i915);
 	}
 
-	kfree(mock_vma);
+	kfree(mock_vma_res);
 	return 0;
 }
 
@@ -385,7 +387,7 @@ static void close_object_list(struct list_head *objects,
 
 		vma = i915_vma_instance(obj, vm, NULL);
 		if (!IS_ERR(vma))
-			ignored = i915_vma_unbind(vma);
+			ignored = i915_vma_unbind_unlocked(vma);
 
 		list_del(&obj->st_link);
 		i915_gem_object_put(obj);
@@ -496,7 +498,7 @@ static int fill_hole(struct i915_address_space *vm,
 						goto err;
 					}
 
-					err = i915_vma_unbind(vma);
+					err = i915_vma_unbind_unlocked(vma);
 					if (err) {
 						pr_err("%s(%s) (forward) unbind of vma.node=%llx + %llx failed with err=%d\n",
 						       __func__, p->name, vma->node.start, vma->node.size,
@@ -569,7 +571,7 @@ static int fill_hole(struct i915_address_space *vm,
 						goto err;
 					}
 
-					err = i915_vma_unbind(vma);
+					err = i915_vma_unbind_unlocked(vma);
 					if (err) {
 						pr_err("%s(%s) (backward) unbind of vma.node=%llx + %llx failed with err=%d\n",
 						       __func__, p->name, vma->node.start, vma->node.size,
@@ -655,7 +657,7 @@ static int walk_hole(struct i915_address_space *vm,
 				goto err_put;
 			}
 
-			err = i915_vma_unbind(vma);
+			err = i915_vma_unbind_unlocked(vma);
 			if (err) {
 				pr_err("%s unbind failed at %llx + %llx  with err=%d\n",
 				       __func__, addr, vma->size, err);
@@ -732,13 +734,13 @@ static int pot_hole(struct i915_address_space *vm,
 				pr_err("%s incorrect at %llx + %llx\n",
 				       __func__, addr, vma->size);
 				i915_vma_unpin(vma);
-				err = i915_vma_unbind(vma);
+				err = i915_vma_unbind_unlocked(vma);
 				err = -EINVAL;
 				goto err_obj;
 			}
 
 			i915_vma_unpin(vma);
-			err = i915_vma_unbind(vma);
+			err = i915_vma_unbind_unlocked(vma);
 			GEM_BUG_ON(err);
 		}
 
@@ -832,13 +834,13 @@ static int drunk_hole(struct i915_address_space *vm,
 				pr_err("%s incorrect at %llx + %llx\n",
 				       __func__, addr, BIT_ULL(size));
 				i915_vma_unpin(vma);
-				err = i915_vma_unbind(vma);
+				err = i915_vma_unbind_unlocked(vma);
 				err = -EINVAL;
 				goto err_obj;
 			}
 
 			i915_vma_unpin(vma);
-			err = i915_vma_unbind(vma);
+			err = i915_vma_unbind_unlocked(vma);
 			GEM_BUG_ON(err);
 
 			if (igt_timeout(end_time,
@@ -906,7 +908,7 @@ static int __shrink_hole(struct i915_address_space *vm,
 			pr_err("%s incorrect at %llx + %llx\n",
 			       __func__, addr, size);
 			i915_vma_unpin(vma);
-			err = i915_vma_unbind(vma);
+			err = i915_vma_unbind_unlocked(vma);
 			err = -EINVAL;
 			break;
 		}
@@ -1122,7 +1124,7 @@ static int exercise_ggtt(struct drm_i915_private *i915,
 				     u64 hole_start, u64 hole_end,
 				     unsigned long end_time))
 {
-	struct i915_ggtt *ggtt = &i915->ggtt;
+	struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
 	u64 hole_start, hole_end, last = 0;
 	struct drm_mm_node *node;
 	IGT_TIMEOUT(end_time);
@@ -1182,7 +1184,7 @@ static int igt_ggtt_page(void *arg)
 	const unsigned int count = PAGE_SIZE/sizeof(u32);
 	I915_RND_STATE(prng);
 	struct drm_i915_private *i915 = arg;
-	struct i915_ggtt *ggtt = &i915->ggtt;
+	struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
 	struct drm_i915_gem_object *obj;
 	intel_wakeref_t wakeref;
 	struct drm_mm_node tmp;
@@ -1279,6 +1281,7 @@ static void track_vma_bind(struct i915_vma *vma)
 	atomic_set(&vma->pages_count, I915_VMA_PAGES_ACTIVE);
 	__i915_gem_object_pin_pages(obj);
 	vma->pages = obj->mm.pages;
+	vma->resource->bi.pages = vma->pages;
 
 	mutex_lock(&vma->vm->mutex);
 	list_add_tail(&vma->vm_link, &vma->vm->bound_list);
@@ -1336,6 +1339,33 @@ static int igt_mock_drunk(void *arg)
 	return exercise_mock(ggtt->vm.i915, drunk_hole);
 }
 
+static int reserve_gtt_with_resource(struct i915_vma *vma, u64 offset)
+{
+	struct i915_address_space *vm = vma->vm;
+	struct i915_vma_resource *vma_res;
+	struct drm_i915_gem_object *obj = vma->obj;
+	int err;
+
+	vma_res = i915_vma_resource_alloc();
+	if (IS_ERR(vma_res))
+		return PTR_ERR(vma_res);
+
+	mutex_lock(&vm->mutex);
+	err = i915_gem_gtt_reserve(vm, NULL, &vma->node, obj->base.size,
+				   offset,
+				   obj->cache_level,
+				   0);
+	if (!err) {
+		i915_vma_resource_init_from_vma(vma_res, vma);
+		vma->resource = vma_res;
+	} else {
+		kfree(vma_res);
+	}
+	mutex_unlock(&vm->mutex);
+
+	return err;
+}
+
 static int igt_gtt_reserve(void *arg)
 {
 	struct i915_ggtt *ggtt = arg;
@@ -1370,20 +1400,13 @@ static int igt_gtt_reserve(void *arg)
 		}
 
 		list_add(&obj->st_link, &objects);
-
 		vma = i915_vma_instance(obj, &ggtt->vm, NULL);
 		if (IS_ERR(vma)) {
 			err = PTR_ERR(vma);
 			goto out;
 		}
 
-		mutex_lock(&ggtt->vm.mutex);
-		err = i915_gem_gtt_reserve(&ggtt->vm, &vma->node,
-					   obj->base.size,
-					   total,
-					   obj->cache_level,
-					   0);
-		mutex_unlock(&ggtt->vm.mutex);
+		err = reserve_gtt_with_resource(vma, total);
 		if (err) {
 			pr_err("i915_gem_gtt_reserve (pass 1) failed at %llu/%llu with err=%d\n",
 			       total, ggtt->vm.total, err);
@@ -1429,13 +1452,7 @@ static int igt_gtt_reserve(void *arg)
 			goto out;
 		}
 
-		mutex_lock(&ggtt->vm.mutex);
-		err = i915_gem_gtt_reserve(&ggtt->vm, &vma->node,
-					   obj->base.size,
-					   total,
-					   obj->cache_level,
-					   0);
-		mutex_unlock(&ggtt->vm.mutex);
+		err = reserve_gtt_with_resource(vma, total);
 		if (err) {
 			pr_err("i915_gem_gtt_reserve (pass 2) failed at %llu/%llu with err=%d\n",
 			       total, ggtt->vm.total, err);
@@ -1465,7 +1482,7 @@ static int igt_gtt_reserve(void *arg)
 			goto out;
 		}
 
-		err = i915_vma_unbind(vma);
+		err = i915_vma_unbind_unlocked(vma);
 		if (err) {
 			pr_err("i915_vma_unbind failed with err=%d!\n", err);
 			goto out;
@@ -1476,13 +1493,7 @@ static int igt_gtt_reserve(void *arg)
 					   2 * I915_GTT_PAGE_SIZE,
 					   I915_GTT_MIN_ALIGNMENT);
 
-		mutex_lock(&ggtt->vm.mutex);
-		err = i915_gem_gtt_reserve(&ggtt->vm, &vma->node,
-					   obj->base.size,
-					   offset,
-					   obj->cache_level,
-					   0);
-		mutex_unlock(&ggtt->vm.mutex);
+		err = reserve_gtt_with_resource(vma, offset);
 		if (err) {
 			pr_err("i915_gem_gtt_reserve (pass 3) failed at %llu/%llu with err=%d\n",
 			       total, ggtt->vm.total, err);
@@ -1509,6 +1520,31 @@ static int igt_gtt_reserve(void *arg)
 	return err;
 }
 
+static int insert_gtt_with_resource(struct i915_vma *vma)
+{
+	struct i915_address_space *vm = vma->vm;
+	struct i915_vma_resource *vma_res;
+	struct drm_i915_gem_object *obj = vma->obj;
+	int err;
+
+	vma_res = i915_vma_resource_alloc();
+	if (IS_ERR(vma_res))
+		return PTR_ERR(vma_res);
+
+	mutex_lock(&vm->mutex);
+	err = i915_gem_gtt_insert(vm, NULL, &vma->node, obj->base.size, 0,
+				  obj->cache_level, 0, vm->total, 0);
+	if (!err) {
+		i915_vma_resource_init_from_vma(vma_res, vma);
+		vma->resource = vma_res;
+	} else {
+		kfree(vma_res);
+	}
+	mutex_unlock(&vm->mutex);
+
+	return err;
+}
+
 static int igt_gtt_insert(void *arg)
 {
 	struct i915_ggtt *ggtt = arg;
@@ -1552,7 +1588,7 @@ static int igt_gtt_insert(void *arg)
 	/* Check a couple of obviously invalid requests */
 	for (ii = invalid_insert; ii->size; ii++) {
 		mutex_lock(&ggtt->vm.mutex);
-		err = i915_gem_gtt_insert(&ggtt->vm, &tmp,
+		err = i915_gem_gtt_insert(&ggtt->vm, NULL, &tmp,
 					  ii->size, ii->alignment,
 					  I915_COLOR_UNEVICTABLE,
 					  ii->start, ii->end,
@@ -1593,12 +1629,7 @@ static int igt_gtt_insert(void *arg)
 			goto out;
 		}
 
-		mutex_lock(&ggtt->vm.mutex);
-		err = i915_gem_gtt_insert(&ggtt->vm, &vma->node,
-					  obj->base.size, 0, obj->cache_level,
-					  0, ggtt->vm.total,
-					  0);
-		mutex_unlock(&ggtt->vm.mutex);
+		err = insert_gtt_with_resource(vma);
 		if (err == -ENOSPC) {
 			/* maxed out the GGTT space */
 			i915_gem_object_put(obj);
@@ -1647,18 +1678,13 @@ static int igt_gtt_insert(void *arg)
 		GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 		offset = vma->node.start;
 
-		err = i915_vma_unbind(vma);
+		err = i915_vma_unbind_unlocked(vma);
 		if (err) {
 			pr_err("i915_vma_unbind failed with err=%d!\n", err);
 			goto out;
 		}
 
-		mutex_lock(&ggtt->vm.mutex);
-		err = i915_gem_gtt_insert(&ggtt->vm, &vma->node,
-					  obj->base.size, 0, obj->cache_level,
-					  0, ggtt->vm.total,
-					  0);
-		mutex_unlock(&ggtt->vm.mutex);
+		err = insert_gtt_with_resource(vma);
 		if (err) {
 			pr_err("i915_gem_gtt_insert (pass 2) failed at %llu/%llu with err=%d\n",
 			       total, ggtt->vm.total, err);
@@ -1702,12 +1728,7 @@ static int igt_gtt_insert(void *arg)
 			goto out;
 		}
 
-		mutex_lock(&ggtt->vm.mutex);
-		err = i915_gem_gtt_insert(&ggtt->vm, &vma->node,
-					  obj->base.size, 0, obj->cache_level,
-					  0, ggtt->vm.total,
-					  0);
-		mutex_unlock(&ggtt->vm.mutex);
+		err = insert_gtt_with_resource(vma);
 		if (err) {
 			pr_err("i915_gem_gtt_insert (pass 3) failed at %llu/%llu with err=%d\n",
 			       total, ggtt->vm.total, err);
@@ -1737,26 +1758,28 @@ int i915_gem_gtt_mock_selftests(void)
 		SUBTEST(igt_gtt_insert),
 	};
 	struct drm_i915_private *i915;
-	struct i915_ggtt *ggtt;
+	struct intel_gt *gt;
 	int err;
 
 	i915 = mock_gem_device();
 	if (!i915)
 		return -ENOMEM;
 
-	ggtt = kmalloc(sizeof(*ggtt), GFP_KERNEL);
-	if (!ggtt) {
-		err = -ENOMEM;
+	/* allocate the ggtt */
+	err = intel_gt_assign_ggtt(to_gt(i915));
+	if (err)
 		goto out_put;
-	}
-	mock_init_ggtt(i915, ggtt);
 
-	err = i915_subtests(tests, ggtt);
+	gt = to_gt(i915);
+
+	mock_init_ggtt(gt);
+
+	err = i915_subtests(tests, gt->ggtt);
 
 	mock_device_flush(i915);
 	i915_gem_drain_freed_objects(i915);
-	mock_fini_ggtt(ggtt);
-	kfree(ggtt);
+	mock_fini_ggtt(gt->ggtt);
+
 out_put:
 	mock_destroy_device(i915);
 	return err;
@@ -1939,6 +1962,7 @@ static int igt_cs_tlb(void *arg)
 			struct i915_vm_pt_stash stash = {};
 			struct i915_request *rq;
 			struct i915_gem_ww_ctx ww;
+			struct i915_vma_resource *vma_res;
 			u64 offset;
 
 			offset = igt_random_offset(&prng,
@@ -1959,6 +1983,13 @@ static int igt_cs_tlb(void *arg)
 			if (err)
 				goto end;
 
+			vma_res = i915_vma_resource_alloc();
+			if (IS_ERR(vma_res)) {
+				i915_vma_put_pages(vma);
+				err = PTR_ERR(vma_res);
+				goto end;
+			}
+
 			i915_gem_ww_ctx_init(&ww, false);
 retry:
 			err = i915_vm_lock_objects(vm, &ww);
@@ -1980,33 +2011,41 @@ static int igt_cs_tlb(void *arg)
 					goto retry;
 			}
 			i915_gem_ww_ctx_fini(&ww);
-			if (err)
+			if (err) {
+				kfree(vma_res);
 				goto end;
+			}
 
+			i915_vma_resource_init_from_vma(vma_res, vma);
 			/* Prime the TLB with the dummy pages */
 			for (i = 0; i < count; i++) {
-				vma->node.start = offset + i * PAGE_SIZE;
-				vm->insert_entries(vm, vma, I915_CACHE_NONE, 0);
+				vma_res->start = offset + i * PAGE_SIZE;
+				vm->insert_entries(vm, vma_res, I915_CACHE_NONE,
+						   0);
 
-				rq = submit_batch(ce, vma->node.start);
+				rq = submit_batch(ce, vma_res->start);
 				if (IS_ERR(rq)) {
 					err = PTR_ERR(rq);
+					i915_vma_resource_fini(vma_res);
+					kfree(vma_res);
 					goto end;
 				}
 				i915_request_put(rq);
 			}
-
+			i915_vma_resource_fini(vma_res);
 			i915_vma_put_pages(vma);
 
 			err = context_sync(ce);
 			if (err) {
 				pr_err("%s: dummy setup timed out\n",
 				       ce->engine->name);
+				kfree(vma_res);
 				goto end;
 			}
 
 			vma = i915_vma_instance(act, vm, NULL);
 			if (IS_ERR(vma)) {
+				kfree(vma_res);
 				err = PTR_ERR(vma);
 				goto end;
 			}
@@ -2014,19 +2053,22 @@ static int igt_cs_tlb(void *arg)
 			i915_gem_object_lock(act, NULL);
 			err = i915_vma_get_pages(vma);
 			i915_gem_object_unlock(act);
-			if (err)
+			if (err) {
+				kfree(vma_res);
 				goto end;
+			}
 
+			i915_vma_resource_init_from_vma(vma_res, vma);
 			/* Replace the TLB with target batches */
 			for (i = 0; i < count; i++) {
 				struct i915_request *rq;
 				u32 *cs = batch + i * 64 / sizeof(*cs);
 				u64 addr;
 
-				vma->node.start = offset + i * PAGE_SIZE;
-				vm->insert_entries(vm, vma, I915_CACHE_NONE, 0);
+				vma_res->start = offset + i * PAGE_SIZE;
+				vm->insert_entries(vm, vma_res, I915_CACHE_NONE, 0);
 
-				addr = vma->node.start + i * 64;
+				addr = vma_res->start + i * 64;
 				cs[4] = MI_NOOP;
 				cs[6] = lower_32_bits(addr);
 				cs[7] = upper_32_bits(addr);
@@ -2035,6 +2077,8 @@ static int igt_cs_tlb(void *arg)
 				rq = submit_batch(ce, addr);
 				if (IS_ERR(rq)) {
 					err = PTR_ERR(rq);
+					i915_vma_resource_fini(vma_res);
+					kfree(vma_res);
 					goto end;
 				}
 
@@ -2051,6 +2095,8 @@ static int igt_cs_tlb(void *arg)
 			}
 			end_spin(batch, count - 1);
 
+			i915_vma_resource_fini(vma_res);
+			kfree(vma_res);
 			i915_vma_put_pages(vma);
 
 			err = context_sync(ce);
@@ -2114,7 +2160,7 @@ int i915_gem_gtt_live_selftests(struct drm_i915_private *i915)
 		SUBTEST(igt_cs_tlb),
 	};
 
-	GEM_BUG_ON(offset_in_page(i915->ggtt.vm.total));
+	GEM_BUG_ON(offset_in_page(to_gt(i915)->ggtt->vm.total));
 
 	return i915_subtests(tests, i915);
 }
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index 92a859b341904939c4d697f78b04698ef51b2d83..c56a0c2cd2f75c6bdcc374ee369c2a0749336245 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -26,6 +26,7 @@
 #include <linux/pm_qos.h>
 #include <linux/sort.h>
 
+#include "gem/i915_gem_internal.h"
 #include "gem/i915_gem_pm.h"
 #include "gem/selftests/mock_context.h"
 
@@ -782,6 +783,115 @@ static int __cancel_completed(struct intel_engine_cs *engine)
 	return err;
 }
 
+/*
+ * Test to prove a non-preemptable request can be cancelled and a subsequent
+ * request on the same context can successfully complete after cancellation.
+ *
+ * Testing methodology is to create a non-preemptible request and submit it,
+ * wait for spinner to start, create a NOP request and submit it, cancel the
+ * spinner, wait for spinner to complete and verify it failed with an error,
+ * finally wait for NOP request to complete verify it succeeded without an
+ * error. Preemption timeout also reduced / restored so test runs in a timely
+ * maner.
+ */
+static int __cancel_reset(struct drm_i915_private *i915,
+			  struct intel_engine_cs *engine)
+{
+	struct intel_context *ce;
+	struct igt_spinner spin;
+	struct i915_request *rq, *nop;
+	unsigned long preempt_timeout_ms;
+	int err = 0;
+
+	if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT ||
+	    !intel_has_reset_engine(engine->gt))
+		return 0;
+
+	preempt_timeout_ms = engine->props.preempt_timeout_ms;
+	engine->props.preempt_timeout_ms = 100;
+
+	if (igt_spinner_init(&spin, engine->gt))
+		goto out_restore;
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce)) {
+		err = PTR_ERR(ce);
+		goto out_spin;
+	}
+
+	rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto out_ce;
+	}
+
+	pr_debug("%s: Cancelling active non-preemptable request\n",
+		 engine->name);
+	i915_request_get(rq);
+	i915_request_add(rq);
+	if (!igt_wait_for_spinner(&spin, rq)) {
+		struct drm_printer p = drm_info_printer(engine->i915->drm.dev);
+
+		pr_err("Failed to start spinner on %s\n", engine->name);
+		intel_engine_dump(engine, &p, "%s\n", engine->name);
+		err = -ETIME;
+		goto out_rq;
+	}
+
+	nop = intel_context_create_request(ce);
+	if (IS_ERR(nop))
+		goto out_rq;
+	i915_request_get(nop);
+	i915_request_add(nop);
+
+	i915_request_cancel(rq, -EINTR);
+
+	if (i915_request_wait(rq, 0, HZ) < 0) {
+		struct drm_printer p = drm_info_printer(engine->i915->drm.dev);
+
+		pr_err("%s: Failed to cancel hung request\n", engine->name);
+		intel_engine_dump(engine, &p, "%s\n", engine->name);
+		err = -ETIME;
+		goto out_nop;
+	}
+
+	if (rq->fence.error != -EINTR) {
+		pr_err("%s: fence not cancelled (%u)\n",
+		       engine->name, rq->fence.error);
+		err = -EINVAL;
+		goto out_nop;
+	}
+
+	if (i915_request_wait(nop, 0, HZ) < 0) {
+		struct drm_printer p = drm_info_printer(engine->i915->drm.dev);
+
+		pr_err("%s: Failed to complete nop request\n", engine->name);
+		intel_engine_dump(engine, &p, "%s\n", engine->name);
+		err = -ETIME;
+		goto out_nop;
+	}
+
+	if (nop->fence.error != 0) {
+		pr_err("%s: Nop request errored (%u)\n",
+		       engine->name, nop->fence.error);
+		err = -EINVAL;
+	}
+
+out_nop:
+	i915_request_put(nop);
+out_rq:
+	i915_request_put(rq);
+out_ce:
+	intel_context_put(ce);
+out_spin:
+	igt_spinner_fini(&spin);
+out_restore:
+	engine->props.preempt_timeout_ms = preempt_timeout_ms;
+	if (err)
+		pr_err("%s: %s error %d\n", __func__, engine->name, err);
+	return err;
+}
+
 static int live_cancel_request(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
@@ -814,6 +924,14 @@ static int live_cancel_request(void *arg)
 			return err;
 		if (err2)
 			return err2;
+
+		/* Expects reset so call outside of igt_live_test_* */
+		err = __cancel_reset(i915, engine);
+		if (err)
+			return err;
+
+		if (igt_flush_test(i915))
+			return -EIO;
 	}
 
 	return 0;
@@ -843,7 +961,7 @@ static struct i915_vma *empty_batch(struct drm_i915_private *i915)
 
 	intel_gt_chipset_flush(to_gt(i915));
 
-	vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
+	vma = i915_vma_instance(obj, &to_gt(i915)->ggtt->vm, NULL);
 	if (IS_ERR(vma)) {
 		err = PTR_ERR(vma);
 		goto err;
diff --git a/drivers/gpu/drm/i915/selftests/i915_selftest.c b/drivers/gpu/drm/i915/selftests/i915_selftest.c
index 2d6d7bd13c3c771f547b74442ac3d72ad85bc269..c4e932368b37e751e83641f5195e7a48037acbe0 100644
--- a/drivers/gpu/drm/i915/selftests/i915_selftest.c
+++ b/drivers/gpu/drm/i915/selftests/i915_selftest.c
@@ -24,6 +24,7 @@
 #include <linux/random.h>
 
 #include "gt/intel_gt_pm.h"
+#include "i915_driver.h"
 #include "i915_drv.h"
 #include "i915_selftest.h"
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c
index 5c5809dfe9b254bb08f9d66c9b478fbf802d731f..6921ba1280151b8d53de065726b3aae79a1efe6c 100644
--- a/drivers/gpu/drm/i915/selftests/i915_vma.c
+++ b/drivers/gpu/drm/i915/selftests/i915_vma.c
@@ -25,6 +25,7 @@
 #include <linux/prime_numbers.h>
 
 #include "gem/i915_gem_context.h"
+#include "gem/i915_gem_internal.h"
 #include "gem/selftests/mock_context.h"
 
 #include "i915_scatterlist.h"
@@ -340,7 +341,7 @@ static int igt_vma_pin1(void *arg)
 
 		if (!err) {
 			i915_vma_unpin(vma);
-			err = i915_vma_unbind(vma);
+			err = i915_vma_unbind_unlocked(vma);
 			if (err) {
 				pr_err("Failed to unbind single page from GGTT, err=%d\n", err);
 				goto out;
@@ -691,7 +692,7 @@ static int igt_vma_rotate_remap(void *arg)
 					}
 
 					i915_vma_unpin(vma);
-					err = i915_vma_unbind(vma);
+					err = i915_vma_unbind_unlocked(vma);
 					if (err) {
 						pr_err("Unbinding returned %i\n", err);
 						goto out_object;
@@ -852,7 +853,7 @@ static int igt_vma_partial(void *arg)
 
 				i915_vma_unpin(vma);
 				nvma++;
-				err = i915_vma_unbind(vma);
+				err = i915_vma_unbind_unlocked(vma);
 				if (err) {
 					pr_err("Unbinding returned %i\n", err);
 					goto out_object;
@@ -891,7 +892,7 @@ static int igt_vma_partial(void *arg)
 
 		i915_vma_unpin(vma);
 
-		err = i915_vma_unbind(vma);
+		err = i915_vma_unbind_unlocked(vma);
 		if (err) {
 			pr_err("Unbinding returned %i\n", err);
 			goto out_object;
@@ -922,26 +923,28 @@ int i915_vma_mock_selftests(void)
 		SUBTEST(igt_vma_partial),
 	};
 	struct drm_i915_private *i915;
-	struct i915_ggtt *ggtt;
+	struct intel_gt *gt;
 	int err;
 
 	i915 = mock_gem_device();
 	if (!i915)
 		return -ENOMEM;
 
-	ggtt = kmalloc(sizeof(*ggtt), GFP_KERNEL);
-	if (!ggtt) {
-		err = -ENOMEM;
+	/* allocate the ggtt */
+	err = intel_gt_assign_ggtt(to_gt(i915));
+	if (err)
 		goto out_put;
-	}
-	mock_init_ggtt(i915, ggtt);
 
-	err = i915_subtests(tests, ggtt);
+	gt = to_gt(i915);
+
+	mock_init_ggtt(gt);
+
+	err = i915_subtests(tests, gt->ggtt);
 
 	mock_device_flush(i915);
 	i915_gem_drain_freed_objects(i915);
-	mock_fini_ggtt(ggtt);
-	kfree(ggtt);
+	mock_fini_ggtt(gt->ggtt);
+
 out_put:
 	mock_destroy_device(i915);
 	return err;
@@ -982,7 +985,7 @@ static int igt_vma_remapped_gtt(void *arg)
 	intel_wakeref_t wakeref;
 	int err = 0;
 
-	if (!i915_ggtt_has_aperture(&i915->ggtt))
+	if (!i915_ggtt_has_aperture(to_gt(i915)->ggtt))
 		return 0;
 
 	obj = i915_gem_object_create_internal(i915, 10 * 10 * PAGE_SIZE);
diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
index b84594601d30cc8cb8e27bf19c0c87e823d1932d..b484e12df41786482ddf7eac9275892336f7e029 100644
--- a/drivers/gpu/drm/i915/selftests/igt_flush_test.c
+++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c
@@ -19,7 +19,7 @@ int igt_flush_test(struct drm_i915_private *i915)
 
 	cond_resched();
 
-	if (intel_gt_wait_for_idle(gt, HZ) == -ETIME) {
+	if (intel_gt_wait_for_idle(gt, HZ * 3) == -ETIME) {
 		pr_err("%pS timed out, cancelling all further testing.\n",
 		       __builtin_return_address(0));
 
diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.c b/drivers/gpu/drm/i915/selftests/igt_spinner.c
index 24d87d0fc747e0413c5edc3a239069d326100116..0c22594ae274694b6e9226d9d8a246783728a176 100644
--- a/drivers/gpu/drm/i915/selftests/igt_spinner.c
+++ b/drivers/gpu/drm/i915/selftests/igt_spinner.c
@@ -6,6 +6,7 @@
 #include "gt/intel_gpu_commands.h"
 #include "gt/intel_gt.h"
 
+#include "gem/i915_gem_internal.h"
 #include "gem/selftests/igt_gem_utils.h"
 
 #include "igt_spinner.h"
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 8aa7b1d338659c2c249fe6e21b7f91fd0427f174..573d9b2e1a4a3cde2d3ee304e849d08fa7d99fdd 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -69,7 +69,7 @@ static void mock_device_release(struct drm_device *dev)
 	i915_gem_drain_workqueue(i915);
 	i915_gem_drain_freed_objects(i915);
 
-	mock_fini_ggtt(&i915->ggtt);
+	mock_fini_ggtt(to_gt(i915)->ggtt);
 	destroy_workqueue(i915->wq);
 
 	intel_region_ttm_device_fini(i915);
@@ -161,6 +161,8 @@ struct drm_i915_private *mock_gem_device(void)
 	i915_params_copy(&i915->params, &i915_modparams);
 
 	intel_runtime_pm_init_early(&i915->runtime_pm);
+	/* wakeref tracking has significant overhead */
+	i915->runtime_pm.no_wakeref_tracking = true;
 
 	/* Using the global GTT may ask questions about KMS users, so prepare */
 	drm_mode_config_init(&i915->drm);
@@ -194,8 +196,13 @@ struct drm_i915_private *mock_gem_device(void)
 
 	mock_init_contexts(i915);
 
-	mock_init_ggtt(i915, &i915->ggtt);
-	to_gt(i915)->vm = i915_vm_get(&i915->ggtt.vm);
+	/* allocate the ggtt */
+	ret = intel_gt_assign_ggtt(to_gt(i915));
+	if (ret)
+		goto err_unlock;
+
+	mock_init_ggtt(to_gt(i915));
+	to_gt(i915)->vm = i915_vm_get(&to_gt(i915)->ggtt->vm);
 
 	mkwrite_device_info(i915)->platform_engine_mask = BIT(0);
 	to_gt(i915)->info.engine_mask = BIT(0);
diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c
index 1802baf80a17a11f3f453025581ba4a779ec667b..568840e7ca66e191adb5caff63557d637860952d 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c
@@ -33,23 +33,23 @@ static void mock_insert_page(struct i915_address_space *vm,
 }
 
 static void mock_insert_entries(struct i915_address_space *vm,
-				struct i915_vma *vma,
+				struct i915_vma_resource *vma_res,
 				enum i915_cache_level level, u32 flags)
 {
 }
 
 static void mock_bind_ppgtt(struct i915_address_space *vm,
 			    struct i915_vm_pt_stash *stash,
-			    struct i915_vma *vma,
+			    struct i915_vma_resource *vma_res,
 			    enum i915_cache_level cache_level,
 			    u32 flags)
 {
 	GEM_BUG_ON(flags & I915_VMA_GLOBAL_BIND);
-	set_bit(I915_VMA_LOCAL_BIND_BIT, __i915_vma_flags(vma));
+	vma_res->bound_flags |= flags;
 }
 
 static void mock_unbind_ppgtt(struct i915_address_space *vm,
-			      struct i915_vma *vma)
+			      struct i915_vma_resource *vma_res)
 {
 }
 
@@ -93,23 +93,23 @@ struct i915_ppgtt *mock_ppgtt(struct drm_i915_private *i915, const char *name)
 
 static void mock_bind_ggtt(struct i915_address_space *vm,
 			   struct i915_vm_pt_stash *stash,
-			   struct i915_vma *vma,
+			   struct i915_vma_resource *vma_res,
 			   enum i915_cache_level cache_level,
 			   u32 flags)
 {
 }
 
 static void mock_unbind_ggtt(struct i915_address_space *vm,
-			     struct i915_vma *vma)
+			     struct i915_vma_resource *vma_res)
 {
 }
 
-void mock_init_ggtt(struct drm_i915_private *i915, struct i915_ggtt *ggtt)
+void mock_init_ggtt(struct intel_gt *gt)
 {
-	memset(ggtt, 0, sizeof(*ggtt));
+	struct i915_ggtt *ggtt = gt->ggtt;
 
-	ggtt->vm.gt = to_gt(i915);
-	ggtt->vm.i915 = i915;
+	ggtt->vm.gt = gt;
+	ggtt->vm.i915 = gt->i915;
 	ggtt->vm.is_ggtt = true;
 
 	ggtt->gmadr = (struct resource) DEFINE_RES_MEM(0, 2048 * PAGE_SIZE);
@@ -128,7 +128,6 @@ void mock_init_ggtt(struct drm_i915_private *i915, struct i915_ggtt *ggtt)
 	ggtt->vm.vma_ops.unbind_vma  = mock_unbind_ggtt;
 
 	i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT);
-	to_gt(i915)->ggtt = ggtt;
 }
 
 void mock_fini_ggtt(struct i915_ggtt *ggtt)
diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.h b/drivers/gpu/drm/i915/selftests/mock_gtt.h
index e3f224f43beb6fbd83bb07ce98a58dad315cee32..d6eb90bd7f3f09e86110faaa26d29ffa7c8cd657 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gtt.h
+++ b/drivers/gpu/drm/i915/selftests/mock_gtt.h
@@ -27,8 +27,9 @@
 
 struct drm_i915_private;
 struct i915_ggtt;
+struct intel_gt;
 
-void mock_init_ggtt(struct drm_i915_private *i915, struct i915_ggtt *ggtt);
+void mock_init_ggtt(struct intel_gt *gt);
 void mock_fini_ggtt(struct i915_ggtt *ggtt);
 
 struct i915_ppgtt *mock_ppgtt(struct drm_i915_private *i915, const char *name);
diff --git a/include/drm/dp/drm_dp_helper.h b/include/drm/dp/drm_dp_helper.h
index 98d020835b49c61af201f37dbf83f0c9bc465eed..69487bd8ed561fde17ca73e09af821c1e54e4cf8 100644
--- a/include/drm/dp/drm_dp_helper.h
+++ b/include/drm/dp/drm_dp_helper.h
@@ -560,6 +560,7 @@ struct drm_panel;
 # define DP_TRAINING_PATTERN_DISABLE	    0
 # define DP_TRAINING_PATTERN_1		    1
 # define DP_TRAINING_PATTERN_2		    2
+# define DP_TRAINING_PATTERN_2_CDS	    3	    /* 2.0 E11 */
 # define DP_TRAINING_PATTERN_3		    3	    /* 1.2 */
 # define DP_TRAINING_PATTERN_4              7       /* 1.4 */
 # define DP_TRAINING_PATTERN_MASK	    0x3
@@ -738,11 +739,13 @@ struct drm_panel;
 			    DP_LANE_CHANNEL_EQ_DONE |	\
 			    DP_LANE_SYMBOL_LOCKED)
 
-#define DP_LANE_ALIGN_STATUS_UPDATED	    0x204
-
-#define DP_INTERLANE_ALIGN_DONE		    (1 << 0)
-#define DP_DOWNSTREAM_PORT_STATUS_CHANGED   (1 << 6)
-#define DP_LINK_STATUS_UPDATED		    (1 << 7)
+#define DP_LANE_ALIGN_STATUS_UPDATED                    0x204
+#define  DP_INTERLANE_ALIGN_DONE                        (1 << 0)
+#define  DP_128B132B_DPRX_EQ_INTERLANE_ALIGN_DONE       (1 << 2) /* 2.0 E11 */
+#define  DP_128B132B_DPRX_CDS_INTERLANE_ALIGN_DONE      (1 << 3) /* 2.0 E11 */
+#define  DP_128B132B_LT_FAILED                          (1 << 4) /* 2.0 E11 */
+#define  DP_DOWNSTREAM_PORT_STATUS_CHANGED              (1 << 6)
+#define  DP_LINK_STATUS_UPDATED                         (1 << 7)
 
 #define DP_SINK_STATUS			    0x205
 # define DP_RECEIVE_PORT_0_STATUS	    (1 << 0)
@@ -1112,6 +1115,7 @@ struct drm_panel;
 # define DP_UHBR13_5                           (1 << 2)
 
 #define DP_128B132B_TRAINING_AUX_RD_INTERVAL                    0x2216 /* 2.0 */
+# define DP_128B132B_TRAINING_AUX_RD_INTERVAL_1MS_UNIT          (1 << 7)
 # define DP_128B132B_TRAINING_AUX_RD_INTERVAL_MASK              0x7f
 # define DP_128B132B_TRAINING_AUX_RD_INTERVAL_400_US            0x00
 # define DP_128B132B_TRAINING_AUX_RD_INTERVAL_4_MS              0x01
@@ -1347,6 +1351,7 @@ struct drm_panel;
 # define DP_PHY_REPEATER_128B132B_SUPPORTED		    (1 << 0)
 /* See DP_128B132B_SUPPORTED_LINK_RATES for values */
 #define DP_PHY_REPEATER_128B132B_RATES			    0xf0007 /* 2.0 */
+#define DP_PHY_REPEATER_EQ_DONE                             0xf0008 /* 2.0 E11 */
 
 enum drm_dp_phy {
 	DP_PHY_DPRX,
@@ -1549,6 +1554,15 @@ void drm_dp_link_train_channel_eq_delay(const struct drm_dp_aux *aux,
 void drm_dp_lttpr_link_train_channel_eq_delay(const struct drm_dp_aux *aux,
 					      const u8 caps[DP_LTTPR_PHY_CAP_SIZE]);
 
+int drm_dp_128b132b_read_aux_rd_interval(struct drm_dp_aux *aux);
+bool drm_dp_128b132b_lane_channel_eq_done(const u8 link_status[DP_LINK_STATUS_SIZE],
+					  int lane_count);
+bool drm_dp_128b132b_lane_symbol_locked(const u8 link_status[DP_LINK_STATUS_SIZE],
+					int lane_count);
+bool drm_dp_128b132b_eq_interlane_align_done(const u8 link_status[DP_LINK_STATUS_SIZE]);
+bool drm_dp_128b132b_cds_interlane_align_done(const u8 link_status[DP_LINK_STATUS_SIZE]);
+bool drm_dp_128b132b_link_training_failed(const u8 link_status[DP_LINK_STATUS_SIZE]);
+
 u8 drm_dp_link_rate_to_bw_code(int link_rate);
 int drm_dp_bw_code_to_link_rate(u8 link_bw);
 
diff --git a/include/linux/iosys-map.h b/include/linux/iosys-map.h
index f4186f91caa6db51a1b2bb2cd1a813da6480fe8e..e69a002d5aa4a085a41db42af4238051ce744f6b 100644
--- a/include/linux/iosys-map.h
+++ b/include/linux/iosys-map.h
@@ -120,6 +120,45 @@ struct iosys_map {
 		.is_iomem = false,	\
 	}
 
+/**
+ * IOSYS_MAP_INIT_OFFSET - Initializes struct iosys_map from another iosys_map
+ * @map_:	The dma-buf mapping structure to copy from
+ * @offset_:	Offset to add to the other mapping
+ *
+ * Initializes a new iosys_map struct based on another passed as argument. It
+ * does a shallow copy of the struct so it's possible to update the back storage
+ * without changing where the original map points to. It is the equivalent of
+ * doing:
+ *
+ * .. code-block:: c
+ *
+ *	iosys_map map = other_map;
+ *	iosys_map_incr(&map, &offset);
+ *
+ * Example usage:
+ *
+ * .. code-block:: c
+ *
+ *	void foo(struct device *dev, struct iosys_map *base_map)
+ *	{
+ *		...
+ *		struct iosys_map map = IOSYS_MAP_INIT_OFFSET(base_map, FIELD_OFFSET);
+ *		...
+ *	}
+ *
+ * The advantage of using the initializer over just increasing the offset with
+ * iosys_map_incr() like above is that the new map will always point to the
+ * right place of the buffer during its scope. It reduces the risk of updating
+ * the wrong part of the buffer and having no compiler warning about that. If
+ * the assignment to IOSYS_MAP_INIT_OFFSET() is forgotten, the compiler can warn
+ * about the use of uninitialized variable.
+ */
+#define IOSYS_MAP_INIT_OFFSET(map_, offset_) ({				\
+	struct iosys_map copy = *map_;					\
+	iosys_map_incr(&copy, offset_);					\
+	copy;								\
+})
+
 /**
  * iosys_map_set_vaddr - Sets a iosys mapping structure to an address in system memory
  * @map:	The iosys_map structure
@@ -220,22 +259,43 @@ static inline void iosys_map_clear(struct iosys_map *map)
 }
 
 /**
- * iosys_map_memcpy_to - Memcpy into iosys mapping
+ * iosys_map_memcpy_to - Memcpy into offset of iosys_map
  * @dst:	The iosys_map structure
+ * @dst_offset:	The offset from which to copy
  * @src:	The source buffer
  * @len:	The number of byte in src
  *
- * Copies data into a iosys mapping. The source buffer is in system
- * memory. Depending on the buffer's location, the helper picks the correct
- * method of accessing the memory.
+ * Copies data into a iosys_map with an offset. The source buffer is in
+ * system memory. Depending on the buffer's location, the helper picks the
+ * correct method of accessing the memory.
  */
-static inline void iosys_map_memcpy_to(struct iosys_map *dst, const void *src,
-				       size_t len)
+static inline void iosys_map_memcpy_to(struct iosys_map *dst, size_t dst_offset,
+				       const void *src, size_t len)
 {
 	if (dst->is_iomem)
-		memcpy_toio(dst->vaddr_iomem, src, len);
+		memcpy_toio(dst->vaddr_iomem + dst_offset, src, len);
+	else
+		memcpy(dst->vaddr + dst_offset, src, len);
+}
+
+/**
+ * iosys_map_memcpy_from - Memcpy from iosys_map into system memory
+ * @dst:	Destination in system memory
+ * @src:	The iosys_map structure
+ * @src_offset:	The offset from which to copy
+ * @len:	The number of byte in src
+ *
+ * Copies data from a iosys_map with an offset. The dest buffer is in
+ * system memory. Depending on the mapping location, the helper picks the
+ * correct method of accessing the memory.
+ */
+static inline void iosys_map_memcpy_from(void *dst, const struct iosys_map *src,
+					 size_t src_offset, size_t len)
+{
+	if (src->is_iomem)
+		memcpy_fromio(dst, src->vaddr_iomem + src_offset, len);
 	else
-		memcpy(dst->vaddr, src, len);
+		memcpy(dst, src->vaddr + src_offset, len);
 }
 
 /**
@@ -254,4 +314,146 @@ static inline void iosys_map_incr(struct iosys_map *map, size_t incr)
 		map->vaddr += incr;
 }
 
+/**
+ * iosys_map_memset - Memset iosys_map
+ * @dst:	The iosys_map structure
+ * @offset:	Offset from dst where to start setting value
+ * @value:	The value to set
+ * @len:	The number of bytes to set in dst
+ *
+ * Set value in iosys_map. Depending on the buffer's location, the helper
+ * picks the correct method of accessing the memory.
+ */
+static inline void iosys_map_memset(struct iosys_map *dst, size_t offset,
+				    int value, size_t len)
+{
+	if (dst->is_iomem)
+		memset_io(dst->vaddr_iomem + offset, value, len);
+	else
+		memset(dst->vaddr + offset, value, len);
+}
+
+/**
+ * iosys_map_rd - Read a C-type value from the iosys_map
+ *
+ * @map__:	The iosys_map structure
+ * @offset__:	The offset from which to read
+ * @type__:	Type of the value being read
+ *
+ * Read a C type value from iosys_map, handling possible un-aligned accesses to
+ * the mapping.
+ *
+ * Returns:
+ * The value read from the mapping.
+ */
+#define iosys_map_rd(map__, offset__, type__) ({			\
+	type__ val;							\
+	iosys_map_memcpy_from(&val, map__, offset__, sizeof(val));	\
+	val;								\
+})
+
+/**
+ * iosys_map_wr - Write a C-type value to the iosys_map
+ *
+ * @map__:	The iosys_map structure
+ * @offset__:	The offset from the mapping to write to
+ * @type__:	Type of the value being written
+ * @val__:	Value to write
+ *
+ * Write a C-type value to the iosys_map, handling possible un-aligned accesses
+ * to the mapping.
+ */
+#define iosys_map_wr(map__, offset__, type__, val__) ({			\
+	type__ val = (val__);						\
+	iosys_map_memcpy_to(map__, offset__, &val, sizeof(val));	\
+})
+
+/**
+ * iosys_map_rd_field - Read a member from a struct in the iosys_map
+ *
+ * @map__:		The iosys_map structure
+ * @struct_offset__:	Offset from the beggining of the map, where the struct
+ *			is located
+ * @struct_type__:	The struct describing the layout of the mapping
+ * @field__:		Member of the struct to read
+ *
+ * Read a value from iosys_map considering its layout is described by a C struct
+ * starting at @struct_offset__. The field offset and size is calculated and its
+ * value read handling possible un-aligned memory accesses. For example: suppose
+ * there is a @struct foo defined as below and the value ``foo.field2.inner2``
+ * needs to be read from the iosys_map:
+ *
+ * .. code-block:: c
+ *
+ *	struct foo {
+ *		int field1;
+ *		struct {
+ *			int inner1;
+ *			int inner2;
+ *		} field2;
+ *		int field3;
+ *	} __packed;
+ *
+ * This is the expected memory layout of a buffer using iosys_map_rd_field():
+ *
+ * +------------------------------+--------------------------+
+ * | Address                      | Content                  |
+ * +==============================+==========================+
+ * | buffer + 0000                | start of mmapped buffer  |
+ * |                              | pointed by iosys_map     |
+ * +------------------------------+--------------------------+
+ * | ...                          | ...                      |
+ * +------------------------------+--------------------------+
+ * | buffer + ``struct_offset__`` | start of ``struct foo``  |
+ * +------------------------------+--------------------------+
+ * | ...                          | ...                      |
+ * +------------------------------+--------------------------+
+ * | buffer + wwww                | ``foo.field2.inner2``    |
+ * +------------------------------+--------------------------+
+ * | ...                          | ...                      |
+ * +------------------------------+--------------------------+
+ * | buffer + yyyy                | end of ``struct foo``    |
+ * +------------------------------+--------------------------+
+ * | ...                          | ...                      |
+ * +------------------------------+--------------------------+
+ * | buffer + zzzz                | end of mmaped buffer     |
+ * +------------------------------+--------------------------+
+ *
+ * Values automatically calculated by this macro or not needed are denoted by
+ * wwww, yyyy and zzzz. This is the code to read that value:
+ *
+ * .. code-block:: c
+ *
+ *	x = iosys_map_rd_field(&map, offset, struct foo, field2.inner2);
+ *
+ * Returns:
+ * The value read from the mapping.
+ */
+#define iosys_map_rd_field(map__, struct_offset__, struct_type__, field__) ({	\
+	struct_type__ *s;							\
+	iosys_map_rd(map__, struct_offset__ + offsetof(struct_type__, field__),	\
+		     typeof(s->field__));					\
+})
+
+/**
+ * iosys_map_wr_field - Write to a member of a struct in the iosys_map
+ *
+ * @map__:		The iosys_map structure
+ * @struct_offset__:	Offset from the beggining of the map, where the struct
+ *			is located
+ * @struct_type__:	The struct describing the layout of the mapping
+ * @field__:		Member of the struct to read
+ * @val__:		Value to write
+ *
+ * Write a value to the iosys_map considering its layout is described by a C struct
+ * starting at @struct_offset__. The field offset and size is calculated and the
+ * @val__ is written handling possible un-aligned memory accesses. Refer to
+ * iosys_map_rd_field() for expected usage and memory layout.
+ */
+#define iosys_map_wr_field(map__, struct_offset__, struct_type__, field__, val__) ({	\
+	struct_type__ *s;								\
+	iosys_map_wr(map__, struct_offset__ + offsetof(struct_type__, field__),		\
+		     typeof(s->field__), val__);					\
+})
+
 #endif /* __IOSYS_MAP_H__ */