Commit 93a3243f authored by Antonio Caggiano's avatar Antonio Caggiano 🦀
Browse files

Skip short records

parent 8b41a1ee
Pipeline #287255 passed with stages
in 2 minutes and 14 seconds
......@@ -32,6 +32,7 @@ public:
~IntelDriver() override;
std::optional<intel_perf_record_timestamp_correlation> query_correlation_timestamps() const;
void get_new_correlation();
/// @brief OA reports only have the lower 32 bits of the timestamp
/// register, while correlation data has the whole 36 bits.
......
perfetto @ 46e5c3c3
Subproject commit 46e5c3c3c756c7a5e1ba82b2079a8621af635f7b
......@@ -10,11 +10,11 @@
#include <dirent.h>
#include <fcntl.h>
#include <math.h>
#include <poll.h>
#include <strings.h>
#include <sys/ioctl.h>
#include <unistd.h>
#include <math.h>
#include <i915_drm.h>
#include <perf_data_reader.h>
......@@ -37,9 +37,11 @@ uint64_t IntelDriver::get_min_sampling_period_ns()
IntelDriver::IntelDriver(IntelDriver &&other)
: Driver {std::move(other)}
, sampling_period_ns {other.sampling_period_ns}
, timestamp_frequency {other.timestamp_frequency}
, perf {other.perf}
, accu {other.accu}
, last_cpu_timestamp {other.last_cpu_timestamp}
, correlations {std::move(other.correlations)}
, metric_buffer {std::move(other.metric_buffer)}
, total_bytes_read {other.total_bytes_read}
......@@ -59,9 +61,11 @@ IntelDriver &IntelDriver::operator=(IntelDriver &&other)
}
Driver::operator=(std::move(other));
std::swap(sampling_period_ns, other.sampling_period_ns);
std::swap(timestamp_frequency, other.timestamp_frequency);
std::swap(perf, other.perf);
std::swap(accu, other.accu);
std::swap(last_cpu_timestamp, other.last_cpu_timestamp);
std::swap(correlations, other.correlations);
std::swap(metric_buffer, other.metric_buffer);
std::swap(total_bytes_read, other.total_bytes_read);
......@@ -106,11 +110,12 @@ void IntelDriver::enable_counter(uint32_t counter_id)
auto &group = groups[counter.group];
if (metric_set != nullptr) {
if (metric_set->symbol_name != group.name) {
PPS_LOG_ERROR("Unable to enable metrics from different sets: %u "
"belongs to %s but %s is currently in use.",
counter_id,
metric_set->symbol_name,
group.name.c_str());
PPS_LOG_ERROR(
"Unable to enable metrics from different sets: %u "
"belongs to %s but %s is currently in use.",
counter_id,
metric_set->symbol_name,
group.name.c_str());
return;
}
}
......@@ -215,6 +220,19 @@ static uint64_t query_timestamp_frequency(const DrmDevice &drm_device)
return 12000000;
}
void IntelDriver::get_new_correlation()
{
// Rotate left correlations by one position so to make space at the end
std::rotate(correlations.begin(), correlations.begin() + 1, correlations.end());
// Then we overwrite the last correlation with a new one
if (auto corr = query_correlation_timestamps()) {
correlations.back() = *corr;
} else {
PPS_LOG_FATAL("Failed to get correlation timestamps");
}
}
bool IntelDriver::init_perfcnt()
{
// Initialize intel Perf
......@@ -228,7 +246,8 @@ bool IntelDriver::init_perfcnt()
// Find groups and counters
intel_perf_metric_set *metric_set = nullptr;
igt_list_for_each_entry(metric_set, &perf->metric_sets, link) {
igt_list_for_each_entry(metric_set, &perf->metric_sets, link)
{
// Create group
CounterGroup group = {};
group.id = groups.size();
......@@ -270,7 +289,10 @@ bool IntelDriver::init_perfcnt()
counters.emplace_back(std::move(counter_desc));
}
PPS_LOG("Metric set %s: [%lu:%lu]", metric_set->symbol_name, first_counter_id, counters.size()-1);
PPS_LOG("Metric set %s: [%lu:%lu]",
metric_set->symbol_name,
first_counter_id,
counters.size() - 1);
// Store group
groups.emplace_back(std::move(group));
}
......@@ -303,7 +325,8 @@ int IntelDriver::perf_open(const intel_perf_metric_set &metric_set,
// The period_exponent gives a sampling period as follows:
// sample_period = timestamp_period * 2^(period_exponent + 1)
// where timestamp_period is 80ns for Haswell+
auto oa_exponent = (uint32_t) log2(sampling_period_ns * timestamp_frequency / 1000000000ull) - 1;
auto oa_exponent = (uint32_t)log2(sampling_period_ns * timestamp_frequency / 1000000000ull) - 1;
PPS_LOG("OA exp %u", oa_exponent);
properties[p++] = DRM_I915_PERF_PROP_OA_EXPONENT;
properties[p++] = oa_exponent;
......@@ -372,6 +395,20 @@ void IntelDriver::disable_perfcnt()
metric_fd = -1;
}
struct Report {
uint32_t version;
uint32_t timestamp;
uint32_t id;
};
namespace
{
bool close_enough(uint64_t duration, uint64_t sampling_period)
{
return duration > sampling_period - sampling_period / 16;
}
}
/// @brief Transforms the raw data received in from the driver into records
std::vector<PerfRecord> IntelDriver::parse_perf_records(const std::vector<uint8_t> &data,
const size_t byte_count)
......@@ -385,15 +422,26 @@ std::vector<PerfRecord> IntelDriver::parse_perf_records(const std::vector<uint8_
const uint8_t *iter = data.data();
const uint8_t *end = iter + byte_count;
uint64_t prev_cpu_timestamp = last_cpu_timestamp;
while (iter < end) {
// Iterate a record at a time
auto header = reinterpret_cast<const drm_i915_perf_record_header *>(iter);
if (header->type == DRM_I915_PERF_RECORD_SAMPLE) {
// Add the new record to the list
record.resize(header->size); // Possibly 264?
memcpy(record.data(), iter, header->size);
records.emplace_back(record);
// Report is next to the header
auto report = reinterpret_cast<const Report *>(header + 1);
auto cpu_timestamp = correlate_gpu_timestamp(report->timestamp);
auto duration = cpu_timestamp - prev_cpu_timestamp;
// Check distance between last report and this one is good enough
if (close_enough(duration, sampling_period_ns)) {
prev_cpu_timestamp = cpu_timestamp;
// Add the new record to the list
record.resize(header->size); // Possibly 264?
memcpy(record.data(), iter, header->size);
records.emplace_back(record);
}
}
// Go to the next record
......@@ -438,6 +486,9 @@ bool IntelDriver::dump_perfcnt()
}
read_data_from_metric_set();
get_new_correlation();
auto new_records = parse_perf_records(metric_buffer, total_bytes_read);
if (new_records.empty()) {
// No new records from the GPU yet
......@@ -455,26 +506,9 @@ bool IntelDriver::dump_perfcnt()
return false;
}
// Rotate left correlations by one position so to make space at the end
std::rotate(correlations.begin(), correlations.begin() + 1, correlations.end());
// Then we overwrite the last correlation with a new one
if (auto corr = query_correlation_timestamps()) {
correlations.back() = *corr;
} else {
PPS_LOG_FATAL("Failed to get correlation timestamps");
return false;
}
return true;
}
struct Report {
uint32_t version;
uint32_t timestamp;
uint32_t id;
};
/// @brief Adds accumulation src to dst
static void add(const intel_perf_accumulator &src, intel_perf_accumulator &dest)
{
......@@ -491,9 +525,9 @@ uint32_t IntelDriver::gpu_next()
return 0;
}
// Get first and last
// Get first and second
auto record_a = reinterpret_cast<const drm_i915_perf_record_header *>(records[0].data());
auto record_b = reinterpret_cast<const drm_i915_perf_record_header *>(records[records.size()-1].data());
auto record_b = reinterpret_cast<const drm_i915_perf_record_header *>(records[1].data());
intel_perf_accumulator temp_accumulator;
intel_perf_accumulate_reports(
......@@ -504,8 +538,8 @@ uint32_t IntelDriver::gpu_next()
auto report_b = reinterpret_cast<const Report *>(record_b + 1);
auto gpu_timestamp = report_b->timestamp;
// Consume the records (but keep the last one as reference for next computation)
records.erase(std::begin(records), std::end(records) - 1);
// Consume first record
records.erase(std::begin(records), std::begin(records) + 1);
return gpu_timestamp;
}
......@@ -515,13 +549,6 @@ uint64_t IntelDriver::cpu_next()
if (auto gpu_timestamp = gpu_next()) {
auto cpu_timestamp = correlate_gpu_timestamp(gpu_timestamp);
// Make sure this report spans the requested sampling period
auto duration = cpu_timestamp - last_cpu_timestamp;
if (duration < sampling_period_ns) {
// This sample duration is too short, so we accumulate the next one as well
return cpu_next();
}
last_cpu_timestamp = cpu_timestamp;
return cpu_timestamp;
}
......
......@@ -17,7 +17,7 @@
namespace pps::gpu
{
std::optional<IntelDriver> init_concrete_driver()
std::optional<IntelDriver> init_concrete_driver(uint64_t sampling_period_ns = 1000000)
{
auto drm_device_opt = DrmDevice::create(0);
if (!drm_device_opt) {
......@@ -30,7 +30,6 @@ std::optional<IntelDriver> init_concrete_driver()
driver.init_perfcnt();
driver.enable_all_counters();
constexpr uint64_t sampling_period_ns = 1000000;
driver.enable_perfcnt(sampling_period_ns);
return driver;
......@@ -40,7 +39,8 @@ std::optional<IntelDriver> init_concrete_driver()
/// CPU timestamps and that the timing order is correct and preserved
TEST(Intel, GpuTimestamps)
{
auto driver = init_concrete_driver();
constexpr uint64_t sampling_period_ns = 1000000;
auto driver = init_concrete_driver(sampling_period_ns);
if (!driver) {
GTEST_SKIP_("Failed to initialize concrete driver");
}
......@@ -68,6 +68,34 @@ TEST(Intel, GpuTimestamps)
}
}
TEST(Intel, TimestampIntervals)
{
constexpr uint64_t sampling_period_ns = 1000000;
auto driver = init_concrete_driver(sampling_period_ns);
if (!driver) {
GTEST_SKIP_("Failed to initialize concrete driver");
}
uint64_t prev_cpu_ts = 0;
const uint32_t max_iterations = 32;
for (uint32_t iterations = 0; iterations < max_iterations; ++iterations) {
// Dump until we can read some records
while (!driver->dump_perfcnt()) { }
uint32_t sample_count = 0;
while (auto cpu_timestamp = driver->cpu_next()) {
if (iterations > 0) {
PPS_LOG("%lu", cpu_timestamp - prev_cpu_ts);
}
prev_cpu_ts = cpu_timestamp;
sample_count++;
}
EXPECT_GT(sample_count, 0);
}
}
/// @brief Make sure correlations timestamps are valid and that CPU timestamps
/// calculated by the driver fall between first and last correlation
TEST(Intel, Correlations)
......@@ -94,34 +122,6 @@ TEST(Intel, Correlations)
driver->disable_perfcnt();
}
std::vector<uint8_t> read_data_from_file(const char *path)
{
auto file = std::fopen(path, "rb");
EXPECT_TRUE(file != nullptr);
std::fseek(file, 0, SEEK_END);
size_t file_size = std::ftell(file);
EXPECT_GT(file_size, 0);
std::fseek(file, 0, SEEK_SET);
std::vector<uint8_t> data = {};
data.resize(file_size);
size_t bytes_read = std::fread(data.data(), sizeof(data[0]), file_size, file);
EXPECT_EQ(bytes_read, file_size);
return data;
}
/// @brief Intel driver should be able to read perf records from data
TEST(Intel, PerfCnt)
{
// Dummy driver with no DRM device in it
auto driver = IntelDriver();
auto data = read_data_from_file("test/data/gpu/intel/intel.dump");
auto records = driver.parse_perf_records(data, data.size());
EXPECT_GT(records.size(), 0);
}
} // namespace pps::gpu
int main(int argc, char **argv)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment