Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Antonio Caggiano
gfx-pps
Commits
93a3243f
Commit
93a3243f
authored
Mar 19, 2021
by
Antonio Caggiano
🦀
Browse files
Skip short records
parent
8b41a1ee
Pipeline
#287255
passed with stages
in 2 minutes and 14 seconds
Changes
4
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
include/pps/gpu/intel/intel_driver.h
View file @
93a3243f
...
...
@@ -32,6 +32,7 @@ public:
~
IntelDriver
()
override
;
std
::
optional
<
intel_perf_record_timestamp_correlation
>
query_correlation_timestamps
()
const
;
void
get_new_correlation
();
/// @brief OA reports only have the lower 32 bits of the timestamp
/// register, while correlation data has the whole 36 bits.
...
...
perfetto
@
46e5c3c3
Subproject commit 46e5c3c3c756c7a5e1ba82b2079a8621af635f7b
src/gpu/intel/intel_driver.cc
View file @
93a3243f
...
...
@@ -10,11 +10,11 @@
#include <dirent.h>
#include <fcntl.h>
#include <math.h>
#include <poll.h>
#include <strings.h>
#include <sys/ioctl.h>
#include <unistd.h>
#include <math.h>
#include <i915_drm.h>
#include <perf_data_reader.h>
...
...
@@ -37,9 +37,11 @@ uint64_t IntelDriver::get_min_sampling_period_ns()
IntelDriver
::
IntelDriver
(
IntelDriver
&&
other
)
:
Driver
{
std
::
move
(
other
)}
,
sampling_period_ns
{
other
.
sampling_period_ns
}
,
timestamp_frequency
{
other
.
timestamp_frequency
}
,
perf
{
other
.
perf
}
,
accu
{
other
.
accu
}
,
last_cpu_timestamp
{
other
.
last_cpu_timestamp
}
,
correlations
{
std
::
move
(
other
.
correlations
)}
,
metric_buffer
{
std
::
move
(
other
.
metric_buffer
)}
,
total_bytes_read
{
other
.
total_bytes_read
}
...
...
@@ -59,9 +61,11 @@ IntelDriver &IntelDriver::operator=(IntelDriver &&other)
}
Driver
::
operator
=
(
std
::
move
(
other
));
std
::
swap
(
sampling_period_ns
,
other
.
sampling_period_ns
);
std
::
swap
(
timestamp_frequency
,
other
.
timestamp_frequency
);
std
::
swap
(
perf
,
other
.
perf
);
std
::
swap
(
accu
,
other
.
accu
);
std
::
swap
(
last_cpu_timestamp
,
other
.
last_cpu_timestamp
);
std
::
swap
(
correlations
,
other
.
correlations
);
std
::
swap
(
metric_buffer
,
other
.
metric_buffer
);
std
::
swap
(
total_bytes_read
,
other
.
total_bytes_read
);
...
...
@@ -106,11 +110,12 @@ void IntelDriver::enable_counter(uint32_t counter_id)
auto
&
group
=
groups
[
counter
.
group
];
if
(
metric_set
!=
nullptr
)
{
if
(
metric_set
->
symbol_name
!=
group
.
name
)
{
PPS_LOG_ERROR
(
"Unable to enable metrics from different sets: %u "
"belongs to %s but %s is currently in use."
,
counter_id
,
metric_set
->
symbol_name
,
group
.
name
.
c_str
());
PPS_LOG_ERROR
(
"Unable to enable metrics from different sets: %u "
"belongs to %s but %s is currently in use."
,
counter_id
,
metric_set
->
symbol_name
,
group
.
name
.
c_str
());
return
;
}
}
...
...
@@ -215,6 +220,19 @@ static uint64_t query_timestamp_frequency(const DrmDevice &drm_device)
return
12000000
;
}
void
IntelDriver
::
get_new_correlation
()
{
// Rotate left correlations by one position so to make space at the end
std
::
rotate
(
correlations
.
begin
(),
correlations
.
begin
()
+
1
,
correlations
.
end
());
// Then we overwrite the last correlation with a new one
if
(
auto
corr
=
query_correlation_timestamps
())
{
correlations
.
back
()
=
*
corr
;
}
else
{
PPS_LOG_FATAL
(
"Failed to get correlation timestamps"
);
}
}
bool
IntelDriver
::
init_perfcnt
()
{
// Initialize intel Perf
...
...
@@ -228,7 +246,8 @@ bool IntelDriver::init_perfcnt()
// Find groups and counters
intel_perf_metric_set
*
metric_set
=
nullptr
;
igt_list_for_each_entry
(
metric_set
,
&
perf
->
metric_sets
,
link
)
{
igt_list_for_each_entry
(
metric_set
,
&
perf
->
metric_sets
,
link
)
{
// Create group
CounterGroup
group
=
{};
group
.
id
=
groups
.
size
();
...
...
@@ -270,7 +289,10 @@ bool IntelDriver::init_perfcnt()
counters
.
emplace_back
(
std
::
move
(
counter_desc
));
}
PPS_LOG
(
"Metric set %s: [%lu:%lu]"
,
metric_set
->
symbol_name
,
first_counter_id
,
counters
.
size
()
-
1
);
PPS_LOG
(
"Metric set %s: [%lu:%lu]"
,
metric_set
->
symbol_name
,
first_counter_id
,
counters
.
size
()
-
1
);
// Store group
groups
.
emplace_back
(
std
::
move
(
group
));
}
...
...
@@ -303,7 +325,8 @@ int IntelDriver::perf_open(const intel_perf_metric_set &metric_set,
// The period_exponent gives a sampling period as follows:
// sample_period = timestamp_period * 2^(period_exponent + 1)
// where timestamp_period is 80ns for Haswell+
auto
oa_exponent
=
(
uint32_t
)
log2
(
sampling_period_ns
*
timestamp_frequency
/
1000000000ull
)
-
1
;
auto
oa_exponent
=
(
uint32_t
)
log2
(
sampling_period_ns
*
timestamp_frequency
/
1000000000ull
)
-
1
;
PPS_LOG
(
"OA exp %u"
,
oa_exponent
);
properties
[
p
++
]
=
DRM_I915_PERF_PROP_OA_EXPONENT
;
properties
[
p
++
]
=
oa_exponent
;
...
...
@@ -372,6 +395,20 @@ void IntelDriver::disable_perfcnt()
metric_fd
=
-
1
;
}
struct
Report
{
uint32_t
version
;
uint32_t
timestamp
;
uint32_t
id
;
};
namespace
{
bool
close_enough
(
uint64_t
duration
,
uint64_t
sampling_period
)
{
return
duration
>
sampling_period
-
sampling_period
/
16
;
}
}
/// @brief Transforms the raw data received in from the driver into records
std
::
vector
<
PerfRecord
>
IntelDriver
::
parse_perf_records
(
const
std
::
vector
<
uint8_t
>
&
data
,
const
size_t
byte_count
)
...
...
@@ -385,15 +422,26 @@ std::vector<PerfRecord> IntelDriver::parse_perf_records(const std::vector<uint8_
const
uint8_t
*
iter
=
data
.
data
();
const
uint8_t
*
end
=
iter
+
byte_count
;
uint64_t
prev_cpu_timestamp
=
last_cpu_timestamp
;
while
(
iter
<
end
)
{
// Iterate a record at a time
auto
header
=
reinterpret_cast
<
const
drm_i915_perf_record_header
*>
(
iter
);
if
(
header
->
type
==
DRM_I915_PERF_RECORD_SAMPLE
)
{
// Add the new record to the list
record
.
resize
(
header
->
size
);
// Possibly 264?
memcpy
(
record
.
data
(),
iter
,
header
->
size
);
records
.
emplace_back
(
record
);
// Report is next to the header
auto
report
=
reinterpret_cast
<
const
Report
*>
(
header
+
1
);
auto
cpu_timestamp
=
correlate_gpu_timestamp
(
report
->
timestamp
);
auto
duration
=
cpu_timestamp
-
prev_cpu_timestamp
;
// Check distance between last report and this one is good enough
if
(
close_enough
(
duration
,
sampling_period_ns
))
{
prev_cpu_timestamp
=
cpu_timestamp
;
// Add the new record to the list
record
.
resize
(
header
->
size
);
// Possibly 264?
memcpy
(
record
.
data
(),
iter
,
header
->
size
);
records
.
emplace_back
(
record
);
}
}
// Go to the next record
...
...
@@ -438,6 +486,9 @@ bool IntelDriver::dump_perfcnt()
}
read_data_from_metric_set
();
get_new_correlation
();
auto
new_records
=
parse_perf_records
(
metric_buffer
,
total_bytes_read
);
if
(
new_records
.
empty
())
{
// No new records from the GPU yet
...
...
@@ -455,26 +506,9 @@ bool IntelDriver::dump_perfcnt()
return
false
;
}
// Rotate left correlations by one position so to make space at the end
std
::
rotate
(
correlations
.
begin
(),
correlations
.
begin
()
+
1
,
correlations
.
end
());
// Then we overwrite the last correlation with a new one
if
(
auto
corr
=
query_correlation_timestamps
())
{
correlations
.
back
()
=
*
corr
;
}
else
{
PPS_LOG_FATAL
(
"Failed to get correlation timestamps"
);
return
false
;
}
return
true
;
}
struct
Report
{
uint32_t
version
;
uint32_t
timestamp
;
uint32_t
id
;
};
/// @brief Adds accumulation src to dst
static
void
add
(
const
intel_perf_accumulator
&
src
,
intel_perf_accumulator
&
dest
)
{
...
...
@@ -491,9 +525,9 @@ uint32_t IntelDriver::gpu_next()
return
0
;
}
// Get first and
last
// Get first and
second
auto
record_a
=
reinterpret_cast
<
const
drm_i915_perf_record_header
*>
(
records
[
0
].
data
());
auto
record_b
=
reinterpret_cast
<
const
drm_i915_perf_record_header
*>
(
records
[
records
.
size
()
-
1
].
data
());
auto
record_b
=
reinterpret_cast
<
const
drm_i915_perf_record_header
*>
(
records
[
1
].
data
());
intel_perf_accumulator
temp_accumulator
;
intel_perf_accumulate_reports
(
...
...
@@ -504,8 +538,8 @@ uint32_t IntelDriver::gpu_next()
auto
report_b
=
reinterpret_cast
<
const
Report
*>
(
record_b
+
1
);
auto
gpu_timestamp
=
report_b
->
timestamp
;
// Consume
the records (but keep the last one as reference for next computation)
records
.
erase
(
std
::
begin
(
records
),
std
::
end
(
records
)
-
1
);
// Consume
first record
records
.
erase
(
std
::
begin
(
records
),
std
::
begin
(
records
)
+
1
);
return
gpu_timestamp
;
}
...
...
@@ -515,13 +549,6 @@ uint64_t IntelDriver::cpu_next()
if
(
auto
gpu_timestamp
=
gpu_next
())
{
auto
cpu_timestamp
=
correlate_gpu_timestamp
(
gpu_timestamp
);
// Make sure this report spans the requested sampling period
auto
duration
=
cpu_timestamp
-
last_cpu_timestamp
;
if
(
duration
<
sampling_period_ns
)
{
// This sample duration is too short, so we accumulate the next one as well
return
cpu_next
();
}
last_cpu_timestamp
=
cpu_timestamp
;
return
cpu_timestamp
;
}
...
...
test/src/intel_test.cc
View file @
93a3243f
...
...
@@ -17,7 +17,7 @@
namespace
pps
::
gpu
{
std
::
optional
<
IntelDriver
>
init_concrete_driver
()
std
::
optional
<
IntelDriver
>
init_concrete_driver
(
uint64_t
sampling_period_ns
=
1000000
)
{
auto
drm_device_opt
=
DrmDevice
::
create
(
0
);
if
(
!
drm_device_opt
)
{
...
...
@@ -30,7 +30,6 @@ std::optional<IntelDriver> init_concrete_driver()
driver
.
init_perfcnt
();
driver
.
enable_all_counters
();
constexpr
uint64_t
sampling_period_ns
=
1000000
;
driver
.
enable_perfcnt
(
sampling_period_ns
);
return
driver
;
...
...
@@ -40,7 +39,8 @@ std::optional<IntelDriver> init_concrete_driver()
/// CPU timestamps and that the timing order is correct and preserved
TEST
(
Intel
,
GpuTimestamps
)
{
auto
driver
=
init_concrete_driver
();
constexpr
uint64_t
sampling_period_ns
=
1000000
;
auto
driver
=
init_concrete_driver
(
sampling_period_ns
);
if
(
!
driver
)
{
GTEST_SKIP_
(
"Failed to initialize concrete driver"
);
}
...
...
@@ -68,6 +68,34 @@ TEST(Intel, GpuTimestamps)
}
}
TEST
(
Intel
,
TimestampIntervals
)
{
constexpr
uint64_t
sampling_period_ns
=
1000000
;
auto
driver
=
init_concrete_driver
(
sampling_period_ns
);
if
(
!
driver
)
{
GTEST_SKIP_
(
"Failed to initialize concrete driver"
);
}
uint64_t
prev_cpu_ts
=
0
;
const
uint32_t
max_iterations
=
32
;
for
(
uint32_t
iterations
=
0
;
iterations
<
max_iterations
;
++
iterations
)
{
// Dump until we can read some records
while
(
!
driver
->
dump_perfcnt
())
{
}
uint32_t
sample_count
=
0
;
while
(
auto
cpu_timestamp
=
driver
->
cpu_next
())
{
if
(
iterations
>
0
)
{
PPS_LOG
(
"%lu"
,
cpu_timestamp
-
prev_cpu_ts
);
}
prev_cpu_ts
=
cpu_timestamp
;
sample_count
++
;
}
EXPECT_GT
(
sample_count
,
0
);
}
}
/// @brief Make sure correlations timestamps are valid and that CPU timestamps
/// calculated by the driver fall between first and last correlation
TEST
(
Intel
,
Correlations
)
...
...
@@ -94,34 +122,6 @@ TEST(Intel, Correlations)
driver
->
disable_perfcnt
();
}
std
::
vector
<
uint8_t
>
read_data_from_file
(
const
char
*
path
)
{
auto
file
=
std
::
fopen
(
path
,
"rb"
);
EXPECT_TRUE
(
file
!=
nullptr
);
std
::
fseek
(
file
,
0
,
SEEK_END
);
size_t
file_size
=
std
::
ftell
(
file
);
EXPECT_GT
(
file_size
,
0
);
std
::
fseek
(
file
,
0
,
SEEK_SET
);
std
::
vector
<
uint8_t
>
data
=
{};
data
.
resize
(
file_size
);
size_t
bytes_read
=
std
::
fread
(
data
.
data
(),
sizeof
(
data
[
0
]),
file_size
,
file
);
EXPECT_EQ
(
bytes_read
,
file_size
);
return
data
;
}
/// @brief Intel driver should be able to read perf records from data
TEST
(
Intel
,
PerfCnt
)
{
// Dummy driver with no DRM device in it
auto
driver
=
IntelDriver
();
auto
data
=
read_data_from_file
(
"test/data/gpu/intel/intel.dump"
);
auto
records
=
driver
.
parse_perf_records
(
data
,
data
.
size
());
EXPECT_GT
(
records
.
size
(),
0
);
}
}
// namespace pps::gpu
int
main
(
int
argc
,
char
**
argv
)
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment