Commit c05f3642 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf updates from Ingo Molnar:
 "The main updates in this cycle were:

   - Lots of perf tooling changes too voluminous to list (big perf trace
     and perf stat improvements, lots of libtraceevent reorganization,
     etc.), so I'll list the authors and refer to the changelog for
     details:

       Benjamin Peterson, Jérémie Galarneau, Kim Phillips, Peter
       Zijlstra, Ravi Bangoria, Sangwon Hong, Sean V Kelley, Steven
       Rostedt, Thomas Gleixner, Ding Xiang, Eduardo Habkost, Thomas
       Richter, Andi Kleen, Sanskriti Sharma, Adrian Hunter, Tzvetomir
       Stoyanov, Arnaldo Carvalho de Melo, Jiri Olsa.

     ... with the bulk of the changes written by Jiri Olsa, Tzvetomir
     Stoyanov and Arnaldo Carvalho de Melo.

   - Continued intel_rdt work with a focus on playing well with perf
     events. This also imported some non-perf RDT work due to
     dependencies. (Reinette Chatre)

   - Implement counter freezing for Arch Perfmon v4 (Skylake and newer).
     This allows to speed up the PMI handler by avoiding unnecessary MSR
     writes and make it more accurate. (Andi Kleen)

   - kprobes cleanups and simplification (Masami Hiramatsu)

   - Intel Goldmont PMU updates (Kan Liang)

   - ... plus misc other fixes and updates"

* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (155 commits)
  kprobes/x86: Use preempt_enable() in optimized_callback()
  x86/intel_rdt: Prevent pseudo-locking from using stale pointers
  kprobes, x86/ptrace.h: Make regs_get_kernel_stack_nth() not fault on bad stack
  perf/x86/intel: Export mem events only if there's PEBS support
  x86/cpu: Drop pointless static qualifier in punit_dev_state_show()
  x86/intel_rdt: Fix initial allocation to consider CDP
  x86/intel_rdt: CBM overlap should also check for overlap with CDP peer
  x86/intel_rdt: Introduce utility to obtain CDP peer
  tools lib traceevent, perf tools: Move struct tep_handler definition in a local header file
  tools lib traceevent: Separate out tep_strerror() for strerror_r() issues
  perf python: More portable way to make CFLAGS work with clang
  perf python: Make clang_has_option() work on Python 3
  perf tools: Free temporary 'sys' string in read_event_files()
  perf tools: Avoid double free in read_event_file()
  perf tools: Free 'printk' string in parse_ftrace_printk()
  perf tools: Cleanup trace-event-info 'tdata' leak
  perf strbuf: Match va_{add,copy} with va_end
  perf test: S390 does not support watchpoints in test 22
  perf auxtrace: Include missing asm/bitsperlong.h to get BITS_PER_LONG
  tools include: Adopt linux/bits.h
  ...
parents 0200fbdd dda93b45
......@@ -856,6 +856,11 @@
causing system reset or hang due to sending
INIT from AP to BSP.
disable_counter_freezing [HW]
Disable Intel PMU counter freezing feature.
The feature only exists starting from
Arch Perfmon v4 (Skylake and newer).
disable_ddw [PPC/PSERIES]
Disable Dynamic DMA Window support. Use this if
to workaround buggy firmware.
......
......@@ -520,18 +520,24 @@ the pseudo-locked region:
2) Cache hit and miss measurements using model specific precision counters if
available. Depending on the levels of cache on the system the pseudo_lock_l2
and pseudo_lock_l3 tracepoints are available.
WARNING: triggering this measurement uses from two (for just L2
measurements) to four (for L2 and L3 measurements) precision counters on
the system, if any other measurements are in progress the counters and
their corresponding event registers will be clobbered.
When a pseudo-locked region is created a new debugfs directory is created for
it in debugfs as /sys/kernel/debug/resctrl/<newdir>. A single
write-only file, pseudo_lock_measure, is present in this directory. The
measurement on the pseudo-locked region depends on the number, 1 or 2,
written to this debugfs file. Since the measurements are recorded with the
tracing infrastructure the relevant tracepoints need to be enabled before the
measurement is triggered.
measurement of the pseudo-locked region depends on the number written to this
debugfs file:
1 - writing "1" to the pseudo_lock_measure file will trigger the latency
measurement captured in the pseudo_lock_mem_latency tracepoint. See
example below.
2 - writing "2" to the pseudo_lock_measure file will trigger the L2 cache
residency (cache hits and misses) measurement captured in the
pseudo_lock_l2 tracepoint. See example below.
3 - writing "3" to the pseudo_lock_measure file will trigger the L3 cache
residency (cache hits and misses) measurement captured in the
pseudo_lock_l3 tracepoint.
All measurements are recorded with the tracing infrastructure. This requires
the relevant tracepoints to be enabled before the measurement is triggered.
Example of latency debugging interface:
In this example a pseudo-locked region named "newlock" was created. Here is
......
......@@ -1033,6 +1033,27 @@ static inline void x86_assign_hw_event(struct perf_event *event,
}
}
/**
* x86_perf_rdpmc_index - Return PMC counter used for event
* @event: the perf_event to which the PMC counter was assigned
*
* The counter assigned to this performance event may change if interrupts
* are enabled. This counter should thus never be used while interrupts are
* enabled. Before this function is used to obtain the assigned counter the
* event should be checked for validity using, for example,
* perf_event_read_local(), within the same interrupt disabled section in
* which this counter is planned to be used.
*
* Return: The index of the performance monitoring counter assigned to
* @perf_event.
*/
int x86_perf_rdpmc_index(struct perf_event *event)
{
lockdep_assert_irqs_disabled();
return event->hw.event_base_rdpmc;
}
static inline int match_prev_assignment(struct hw_perf_event *hwc,
struct cpu_hw_events *cpuc,
int i)
......@@ -1584,7 +1605,7 @@ static void __init pmu_check_apic(void)
}
static struct attribute_group x86_pmu_format_group = {
static struct attribute_group x86_pmu_format_group __ro_after_init = {
.name = "format",
.attrs = NULL,
};
......@@ -1631,9 +1652,9 @@ __init struct attribute **merge_attr(struct attribute **a, struct attribute **b)
struct attribute **new;
int j, i;
for (j = 0; a[j]; j++)
for (j = 0; a && a[j]; j++)
;
for (i = 0; b[i]; i++)
for (i = 0; b && b[i]; i++)
j++;
j++;
......@@ -1642,9 +1663,9 @@ __init struct attribute **merge_attr(struct attribute **a, struct attribute **b)
return NULL;
j = 0;
for (i = 0; a[i]; i++)
for (i = 0; a && a[i]; i++)
new[j++] = a[i];
for (i = 0; b[i]; i++)
for (i = 0; b && b[i]; i++)
new[j++] = b[i];
new[j] = NULL;
......@@ -1715,7 +1736,7 @@ static struct attribute *events_attr[] = {
NULL,
};
static struct attribute_group x86_pmu_events_group = {
static struct attribute_group x86_pmu_events_group __ro_after_init = {
.name = "events",
.attrs = events_attr,
};
......@@ -2230,7 +2251,7 @@ static struct attribute *x86_pmu_attrs[] = {
NULL,
};
static struct attribute_group x86_pmu_attr_group = {
static struct attribute_group x86_pmu_attr_group __ro_after_init = {
.attrs = x86_pmu_attrs,
};
......@@ -2248,7 +2269,7 @@ static struct attribute *x86_pmu_caps_attrs[] = {
NULL
};
static struct attribute_group x86_pmu_caps_group = {
static struct attribute_group x86_pmu_caps_group __ro_after_init = {
.name = "caps",
.attrs = x86_pmu_caps_attrs,
};
......
This diff is collapsed.
......@@ -559,8 +559,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
X86_CSTATES_MODEL(INTEL_FAM6_HASWELL_ULT, hswult_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT1, slm_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT2, slm_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT, slm_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_ATOM_SILVERMONT_X, slm_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_ATOM_AIRMONT, slm_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_BROADWELL_CORE, snb_cstates),
......@@ -581,9 +581,9 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNM, knl_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT, glm_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_ATOM_DENVERTON, glm_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_X, glm_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GEMINI_LAKE, glm_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_PLUS, glm_cstates),
{ },
};
MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
......
......@@ -95,7 +95,7 @@ static ssize_t pt_cap_show(struct device *cdev,
return snprintf(buf, PAGE_SIZE, "%x\n", pt_cap_get(cap));
}
static struct attribute_group pt_cap_group = {
static struct attribute_group pt_cap_group __ro_after_init = {
.name = "caps",
};
......
......@@ -777,9 +777,9 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
X86_RAPL_MODEL_MATCH(INTEL_FAM6_CANNONLAKE_MOBILE, skl_rapl_init),
X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT, hsw_rapl_init),
X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_DENVERTON, hsw_rapl_init),
X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_X, hsw_rapl_init),
X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GEMINI_LAKE, hsw_rapl_init),
X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_PLUS, hsw_rapl_init),
{},
};
......
......@@ -69,14 +69,14 @@ static bool test_intel(int idx)
case INTEL_FAM6_BROADWELL_GT3E:
case INTEL_FAM6_BROADWELL_X:
case INTEL_FAM6_ATOM_SILVERMONT1:
case INTEL_FAM6_ATOM_SILVERMONT2:
case INTEL_FAM6_ATOM_SILVERMONT:
case INTEL_FAM6_ATOM_SILVERMONT_X:
case INTEL_FAM6_ATOM_AIRMONT:
case INTEL_FAM6_ATOM_GOLDMONT:
case INTEL_FAM6_ATOM_DENVERTON:
case INTEL_FAM6_ATOM_GOLDMONT_X:
case INTEL_FAM6_ATOM_GEMINI_LAKE:
case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
case INTEL_FAM6_XEON_PHI_KNL:
case INTEL_FAM6_XEON_PHI_KNM:
......
......@@ -560,9 +560,11 @@ struct x86_pmu {
struct event_constraint *event_constraints;
struct x86_pmu_quirk *quirks;
int perfctr_second_write;
bool late_ack;
u64 (*limit_period)(struct perf_event *event, u64 l);
/* PMI handler bits */
unsigned int late_ack :1,
counter_freezing :1;
/*
* sysfs attrs
*/
......
......@@ -8,9 +8,6 @@
* The "_X" parts are generally the EP and EX Xeons, or the
* "Extreme" ones, like Broadwell-E.
*
* Things ending in "2" are usually because we have no better
* name for them. There's no processor called "SILVERMONT2".
*
* While adding a new CPUID for a new microarchitecture, add a new
* group to keep logically sorted out in chronological order. Within
* that group keep the CPUID for the variants sorted by model number.
......@@ -57,19 +54,23 @@
/* "Small Core" Processors (Atom) */
#define INTEL_FAM6_ATOM_PINEVIEW 0x1C
#define INTEL_FAM6_ATOM_LINCROFT 0x26
#define INTEL_FAM6_ATOM_PENWELL 0x27
#define INTEL_FAM6_ATOM_CLOVERVIEW 0x35
#define INTEL_FAM6_ATOM_CEDARVIEW 0x36
#define INTEL_FAM6_ATOM_SILVERMONT1 0x37 /* BayTrail/BYT / Valleyview */
#define INTEL_FAM6_ATOM_SILVERMONT2 0x4D /* Avaton/Rangely */
#define INTEL_FAM6_ATOM_AIRMONT 0x4C /* CherryTrail / Braswell */
#define INTEL_FAM6_ATOM_MERRIFIELD 0x4A /* Tangier */
#define INTEL_FAM6_ATOM_MOOREFIELD 0x5A /* Anniedale */
#define INTEL_FAM6_ATOM_GOLDMONT 0x5C
#define INTEL_FAM6_ATOM_DENVERTON 0x5F /* Goldmont Microserver */
#define INTEL_FAM6_ATOM_GEMINI_LAKE 0x7A
#define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */
#define INTEL_FAM6_ATOM_BONNELL_MID 0x26 /* Silverthorne, Lincroft */
#define INTEL_FAM6_ATOM_SALTWELL 0x36 /* Cedarview */
#define INTEL_FAM6_ATOM_SALTWELL_MID 0x27 /* Penwell */
#define INTEL_FAM6_ATOM_SALTWELL_TABLET 0x35 /* Cloverview */
#define INTEL_FAM6_ATOM_SILVERMONT 0x37 /* Bay Trail, Valleyview */
#define INTEL_FAM6_ATOM_SILVERMONT_X 0x4D /* Avaton, Rangely */
#define INTEL_FAM6_ATOM_SILVERMONT_MID 0x4A /* Merriefield */
#define INTEL_FAM6_ATOM_AIRMONT 0x4C /* Cherry Trail, Braswell */
#define INTEL_FAM6_ATOM_AIRMONT_MID 0x5A /* Moorefield */
#define INTEL_FAM6_ATOM_GOLDMONT 0x5C /* Apollo Lake */
#define INTEL_FAM6_ATOM_GOLDMONT_X 0x5F /* Denverton */
#define INTEL_FAM6_ATOM_GOLDMONT_PLUS 0x7A /* Gemini Lake */
/* Xeon Phi */
......
......@@ -164,6 +164,7 @@
#define DEBUGCTLMSR_BTS_OFF_OS (1UL << 9)
#define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10)
#define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11)
#define DEBUGCTLMSR_FREEZE_PERFMON_ON_PMI (1UL << 12)
#define DEBUGCTLMSR_FREEZE_IN_SMM_BIT 14
#define DEBUGCTLMSR_FREEZE_IN_SMM (1UL << DEBUGCTLMSR_FREEZE_IN_SMM_BIT)
......
......@@ -278,6 +278,7 @@ struct perf_guest_switch_msr {
extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
extern void perf_check_microcode(void);
extern int x86_perf_rdpmc_index(struct perf_event *event);
#else
static inline struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
{
......
......@@ -238,24 +238,52 @@ static inline int regs_within_kernel_stack(struct pt_regs *regs,
(kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1)));
}
/**
* regs_get_kernel_stack_nth_addr() - get the address of the Nth entry on stack
* @regs: pt_regs which contains kernel stack pointer.
* @n: stack entry number.
*
* regs_get_kernel_stack_nth() returns the address of the @n th entry of the
* kernel stack which is specified by @regs. If the @n th entry is NOT in
* the kernel stack, this returns NULL.
*/
static inline unsigned long *regs_get_kernel_stack_nth_addr(struct pt_regs *regs, unsigned int n)
{
unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs);
addr += n;
if (regs_within_kernel_stack(regs, (unsigned long)addr))
return addr;
else
return NULL;
}
/* To avoid include hell, we can't include uaccess.h */
extern long probe_kernel_read(void *dst, const void *src, size_t size);
/**
* regs_get_kernel_stack_nth() - get Nth entry of the stack
* @regs: pt_regs which contains kernel stack pointer.
* @n: stack entry number.
*
* regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which
* is specified by @regs. If the @n th entry is NOT in the kernel stack,
* is specified by @regs. If the @n th entry is NOT in the kernel stack
* this returns 0.
*/
static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
unsigned int n)
{
unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs);
addr += n;
if (regs_within_kernel_stack(regs, (unsigned long)addr))
return *addr;
else
return 0;
unsigned long *addr;
unsigned long val;
long ret;
addr = regs_get_kernel_stack_nth_addr(regs, n);
if (addr) {
ret = probe_kernel_read(&val, addr, sizeof(val));
if (!ret)
return val;
}
return 0;
}
#define arch_has_single_step() (1)
......
......@@ -949,11 +949,11 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
}
static const __initconst struct x86_cpu_id cpu_no_speculation[] = {
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW, X86_FEATURE_ANY },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW, X86_FEATURE_ANY },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT, X86_FEATURE_ANY },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PENWELL, X86_FEATURE_ANY },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PINEVIEW, X86_FEATURE_ANY },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SALTWELL, X86_FEATURE_ANY },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SALTWELL_TABLET, X86_FEATURE_ANY },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_BONNELL_MID, X86_FEATURE_ANY },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SALTWELL_MID, X86_FEATURE_ANY },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_BONNELL, X86_FEATURE_ANY },
{ X86_VENDOR_CENTAUR, 5 },
{ X86_VENDOR_INTEL, 5 },
{ X86_VENDOR_NSC, 5 },
......@@ -968,10 +968,10 @@ static const __initconst struct x86_cpu_id cpu_no_meltdown[] = {
/* Only list CPUs which speculate but are non susceptible to SSB */
static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = {
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT2 },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MERRIFIELD },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT_X },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT_MID },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_CORE_YONAH },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM },
......@@ -984,14 +984,14 @@ static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = {
static const __initconst struct x86_cpu_id cpu_no_l1tf[] = {
/* in addition to cpu_no_speculation */
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT2 },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT_X },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MERRIFIELD },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MOOREFIELD },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT_MID },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT_MID },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_DENVERTON },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GEMINI_LAKE },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT_X },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT_PLUS },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM },
{}
......
......@@ -485,9 +485,7 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
size_t tsize;
if (is_llc_occupancy_enabled()) {
d->rmid_busy_llc = kcalloc(BITS_TO_LONGS(r->num_rmid),
sizeof(unsigned long),
GFP_KERNEL);
d->rmid_busy_llc = bitmap_zalloc(r->num_rmid, GFP_KERNEL);
if (!d->rmid_busy_llc)
return -ENOMEM;
INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo);
......@@ -496,7 +494,7 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
tsize = sizeof(*d->mbm_total);
d->mbm_total = kcalloc(r->num_rmid, tsize, GFP_KERNEL);
if (!d->mbm_total) {
kfree(d->rmid_busy_llc);
bitmap_free(d->rmid_busy_llc);
return -ENOMEM;
}
}
......@@ -504,7 +502,7 @@ static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
tsize = sizeof(*d->mbm_local);
d->mbm_local = kcalloc(r->num_rmid, tsize, GFP_KERNEL);
if (!d->mbm_local) {
kfree(d->rmid_busy_llc);
bitmap_free(d->rmid_busy_llc);
kfree(d->mbm_total);
return -ENOMEM;
}
......@@ -610,9 +608,16 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
cancel_delayed_work(&d->cqm_limbo);
}
/*
* rdt_domain "d" is going to be freed below, so clear
* its pointer from pseudo_lock_region struct.
*/
if (d->plr)
d->plr->d = NULL;
kfree(d->ctrl_val);
kfree(d->mbps_val);
kfree(d->rmid_busy_llc);
bitmap_free(d->rmid_busy_llc);
kfree(d->mbm_total);
kfree(d->mbm_local);
kfree(d);
......
......@@ -404,8 +404,16 @@ int rdtgroup_schemata_show(struct kernfs_open_file *of,
for_each_alloc_enabled_rdt_resource(r)
seq_printf(s, "%s:uninitialized\n", r->name);
} else if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
seq_printf(s, "%s:%d=%x\n", rdtgrp->plr->r->name,
rdtgrp->plr->d->id, rdtgrp->plr->cbm);
if (!rdtgrp->plr->d) {
rdt_last_cmd_clear();
rdt_last_cmd_puts("Cache domain offline\n");
ret = -ENODEV;
} else {
seq_printf(s, "%s:%d=%x\n",
rdtgrp->plr->r->name,
rdtgrp->plr->d->id,
rdtgrp->plr->cbm);
}
} else {
closid = rdtgrp->closid;
for_each_alloc_enabled_rdt_resource(r) {
......
This diff is collapsed.
......@@ -268,17 +268,27 @@ static int rdtgroup_cpus_show(struct kernfs_open_file *of,
struct seq_file *s, void *v)
{
struct rdtgroup *rdtgrp;
struct cpumask *mask;
int ret = 0;
rdtgrp = rdtgroup_kn_lock_live(of->kn);
if (rdtgrp) {
if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n",
cpumask_pr_args(&rdtgrp->plr->d->cpu_mask));
else
if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
if (!rdtgrp->plr->d) {
rdt_last_cmd_clear();
rdt_last_cmd_puts("Cache domain offline\n");
ret = -ENODEV;
} else {
mask = &rdtgrp->plr->d->cpu_mask;
seq_printf(s, is_cpu_list(of) ?
"%*pbl\n" : "%*pb\n",
cpumask_pr_args(mask));
}
} else {
seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n",
cpumask_pr_args(&rdtgrp->cpu_mask));
}
} else {
ret = -ENOENT;
}
......@@ -961,7 +971,78 @@ static int rdtgroup_mode_show(struct kernfs_open_file *of,
}
/**
* rdtgroup_cbm_overlaps - Does CBM for intended closid overlap with other
* rdt_cdp_peer_get - Retrieve CDP peer if it exists
* @r: RDT resource to which RDT domain @d belongs
* @d: Cache instance for which a CDP peer is requested
* @r_cdp: RDT resource that shares hardware with @r (RDT resource peer)
* Used to return the result.
* @d_cdp: RDT domain that shares hardware with @d (RDT domain peer)
* Used to return the result.
*
* RDT resources are managed independently and by extension the RDT domains
* (RDT resource instances) are managed independently also. The Code and
* Data Prioritization (CDP) RDT resources, while managed independently,
* could refer to the same underlying hardware. For example,
* RDT_RESOURCE_L2CODE and RDT_RESOURCE_L2DATA both refer to the L2 cache.
*
* When provided with an RDT resource @r and an instance of that RDT
* resource @d rdt_cdp_peer_get() will return if there is a peer RDT
* resource and the exact instance that shares the same hardware.
*
* Return: 0 if a CDP peer was found, <0 on error or if no CDP peer exists.
* If a CDP peer was found, @r_cdp will point to the peer RDT resource
* and @d_cdp will point to the peer RDT domain.
*/
static int rdt_cdp_peer_get(struct rdt_resource *r, struct rdt_domain *d,
struct rdt_resource **r_cdp,
struct rdt_domain **d_cdp)
{
struct rdt_resource *_r_cdp = NULL;
struct rdt_domain *_d_cdp = NULL;
int ret = 0;
switch (r->rid) {
case RDT_RESOURCE_L3DATA:
_r_cdp = &rdt_resources_all[RDT_RESOURCE_L3CODE];
break;
case RDT_RESOURCE_L3CODE:
_r_cdp = &rdt_resources_all[RDT_RESOURCE_L3DATA];
break;
case RDT_RESOURCE_L2DATA:
_r_cdp = &rdt_resources_all[RDT_RESOURCE_L2CODE];
break;
case RDT_RESOURCE_L2CODE:
_r_cdp = &rdt_resources_all[RDT_RESOURCE_L2DATA];
break;
default:
ret = -ENOENT;
goto out;
}
/*
* When a new CPU comes online and CDP is enabled then the new
* RDT domains (if any) associated with both CDP RDT resources
* are added in the same CPU online routine while the
* rdtgroup_mutex is held. It should thus not happen for one
* RDT domain to exist and be associated with its RDT CDP
* resource but there is no RDT domain associated with the
* peer RDT CDP resource. Hence the WARN.
*/
_d_cdp = rdt_find_domain(_r_cdp, d->id, NULL);
if (WARN_ON(!_d_cdp)) {
_r_cdp = NULL;
ret = -EINVAL;
}
out:
*r_cdp = _r_cdp;
*d_cdp = _d_cdp;
return ret;
}
/**
* __rdtgroup_cbm_overlaps - Does CBM for intended closid overlap with other
* @r: Resource to which domain instance @d belongs.
* @d: The domain instance for which @closid is being tested.
* @cbm: Capacity bitmask being tested.
......@@ -980,8 +1061,8 @@ static int rdtgroup_mode_show(struct kernfs_open_file *of,
*
* Return: false if CBM does not overlap, true if it does.
*/
bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
unsigned long cbm, int closid, bool exclusive)
static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
unsigned long cbm, int closid, bool exclusive)
{
enum rdtgrp_mode mode;
unsigned long ctrl_b;
......@@ -1016,6 +1097,41 @@ bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
return false;
}
/**
* rdtgroup_cbm_overlaps - Does CBM overlap with other use of hardware
* @r: Resource to which domain instance @d belongs.
* @d: The domain instance for which @closid is being tested.
* @cbm: Capacity bitmask being tested.
* @closid: Intended closid for @cbm.
* @exclusive: Only check if overlaps with exclusive resource groups
*
* Resources that can be allocated using a CBM can use the CBM to control
* the overlap of these allocations. rdtgroup_cmb_overlaps() is the test
* for overlap. Overlap test is not limited to the specific resource for
* which the CBM is intended though - when dealing with CDP resources that
* share the underlying hardware the overlap check should be performed on
* the CDP resource sharing the hardware also.
*
* Refer to description of __rdtgroup_cbm_overlaps() for the details of the
* overlap test.
*
* Return: true if CBM overlap detected, false if there is no overlap
*/
bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
unsigned long cbm, int closid, bool exclusive)
{
struct rdt_resource *r_cdp;
struct rdt_domain *d_cdp;
if (__rdtgroup_cbm_overlaps(r, d, cbm, closid, exclusive))
return true;
if (rdt_cdp_peer_get(r, d, &r_cdp, &d_cdp) < 0)
return false;
return __rdtgroup_cbm_overlaps(r_cdp, d_cdp, cbm, closid, exclusive);
}
/**
* rdtgroup_mode_test_exclusive - Test if this resource group can be exclusive
*
......@@ -1176,6 +1292,7 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
struct rdt_resource *r;
struct rdt_domain *d;
unsigned int size;
int ret = 0;
bool sep;
u32 ctrl;
......@@ -1186,11 +1303,18 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
}
if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
seq_printf(s, "%*s:", max_name_width, rdtgrp->plr->r->name);
size = rdtgroup_cbm_to_size(rdtgrp->plr->r,
rdtgrp->plr->d,
rdtgrp->plr->cbm);
seq_printf(s, "%d=%u\n", rdtgrp->plr->d->id, size);
if (!rdtgrp->plr->d) {
rdt_last_cmd_clear();
rdt_last_cmd_puts("Cache domain offline\n");
ret = -ENODEV;
} else {
seq_printf(s, "%*s:", max_name_width,
rdtgrp->plr->r->name);
size = rdtgroup_cbm_to_size(rdtgrp->plr->r,
rdtgrp->plr->d,
rdtgrp->plr->cbm);
seq_printf(s, "%d=%u\n", rdtgrp->plr->d->id, size);
}
goto out;
}
......@@ -1220,7 +1344,7 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
out:
rdtgroup_kn_unlock(of->kn);
return 0;
return ret;
}
/* rdtgroup information files for one cache resource. */
......@@ -2354,14 +2478,16 @@ static void cbm_ensure_valid(u32 *_val, struct rdt_resource *r)
*/
static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
{
struct rdt_resource *r_cdp = NULL;
struct rdt_domain *d_cdp = NULL;
u32 used_b = 0, unused_b = 0;
u32 closid = rdtgrp->closid;