Commit fc27d3d8 authored by Adam Reichold's avatar Adam Reichold

Add simple Python script to measure and comprare rendering performance of Poppler builds.

parent ebc05abc
Pipeline #7093 passed with stage
in 9 minutes and 37 seconds
......@@ -661,6 +661,7 @@ if(ENABLE_GLIB)
add_subdirectory(glib)
endif()
add_subdirectory(test)
add_subdirectory(perftest)
if(ENABLE_QT5)
add_subdirectory(qt5)
endif()
......
if (ENABLE_SPLASH)
add_executable(driver driver.cc)
target_link_libraries(driver poppler)
endif ()
import pickle
import zlib
from util import reference, reldev, maxabs
def collect_stats3a(stats, entry):
if stats is None:
stats = (0, 0)
sum, acc = stats
mean, stdev = entry
stats = (sum + mean, acc + abs(stdev / mean))
return stats
def collect_stats3b(stats, entry):
if stats is None:
stats = 0
stats += entry
return stats
def collect_stats2(stats, entry):
if stats is None:
stats = { 'run_time': None, 'memory_usage': None }
stats['run_time'] = collect_stats3a(stats['run_time'], entry['run_time'])
stats['memory_usage'] = collect_stats3b(stats['memory_usage'], entry['memory_usage'])
return stats
def collect_stats1(stats, entry, ref_entry):
if stats is None:
stats = { 'results': None, 'ref_results': None }
stats['results'] = collect_stats2(stats['results'], entry)
stats['ref_results'] = collect_stats2(stats['ref_results'], ref_entry)
return stats
def print_stats(count, stats):
run_time_sum, ref_run_time_sum = stats['results']['run_time'][0], stats['ref_results']['run_time'][0]
run_time_acc, ref_run_time_acc = stats['results']['run_time'][1] / count, stats['ref_results']['run_time'][1] / count
run_time_reldev = reldev(run_time_sum, ref_run_time_sum)
memory_usage_sum, ref_memory_usage_sum = stats['results']['memory_usage'], stats['ref_results']['memory_usage']
memory_usage_reldev = reldev(memory_usage_sum, ref_memory_usage_sum)
print('\tCumulative run time:')
print('\t\tResult: %.2f min ∓ %.1f %%' % (run_time_sum * 1.0e-6 / 60.0, run_time_acc * 100.0))
print('\t\tReference: %.2f min ∓ %.1f %%' % (ref_run_time_sum * 1.0e-6 / 60.0, ref_run_time_acc * 100.0))
print('\t\tDeviation: %+.2f %%' % (run_time_reldev))
print('\tCumulative memory usage:')
print('\t\tResult: %.1f MB %%' % (memory_usage_sum / 1024.0 / 1024.0))
print('\t\tReference: %.1f MB %%' % (ref_memory_usage_sum / 1024.0 / 1024.0))
print('\t\tDeviation: %+.2f %%' % (memory_usage_reldev))
def compare(args):
with open(args.results, 'rb') as file:
data = file.read()
data = zlib.decompress(data)
results = pickle.loads(data)
with open(args.reference_results, 'rb') as file:
data = file.read()
data = zlib.decompress(data)
ref_results = pickle.loads(data)
count = 0
stats = None
count_above_threshold = 0
stats_above_threshold = None
for document in results.keys() & ref_results.keys():
entries = results[document]
ref_entries = ref_results[document]
for page in entries.keys() & ref_entries.keys():
entry = entries[page]
ref_entry = ref_entries[page]
count += 1
stats = collect_stats1(stats, entry, ref_entry)
run_time_mean, run_time_stdev = entry['run_time']
ref_run_time_mean, ref_run_time_stdev = ref_entry['run_time']
run_time_reldev = reldev(run_time_mean, ref_run_time_mean)
memory_usage = entry['memory_usage']
ref_memory_usage = ref_entry['memory_usage']
memory_usage_reldev = reldev(memory_usage, ref_memory_usage)
if maxabs(run_time_reldev, memory_usage_reldev) <= args.threshold:
continue
count_above_threshold += 1
stats_above_threshold = collect_stats1(stats_above_threshold, entry, ref_entry)
print('%s:' % (reference(document, page)))
print('\tRun time:')
print('\t\tResult: %.2f ∓ %.3f s' % (run_time_mean * 1.0e-6, run_time_stdev * 1.0e-6))
print('\t\tReference: %.2f ∓ %.3f s' % (ref_run_time_mean * 1.0e-6, ref_run_time_stdev * 1.0e-6))
print('\t\tDeviation: %.1f %%' % (run_time_reldev * 100.0))
print('\tMemory usage:')
print('\t\tResult: %.1f kB' % (memory_usage / 1024.0))
print('\t\tReference: %.1f kB' % (ref_memory_usage / 1024.0))
print('\t\tDeviation: %.1f %%' % (memory_usage_reldev * 100.0))
print('%d matching result(s):' % (count))
print_stats(count, stats)
if count_above_threshold != 0:
print('%d matching result(s) above the given threshold of %.1f %%:' % (count_above_threshold, args.threshold * 100.0))
print_stats(count_above_threshold, stats_above_threshold)
#include <string>
#include <memory>
#include <functional>
#include <vector>
#include <iostream>
#include <algorithm>
#include <numeric>
#include <cstdlib>
#include <cmath>
#include <time.h>
#include "PDFDoc.h"
#include "SplashOutputDev.h"
#include "splash/SplashBitmap.h"
#include "TextOutputDev.h"
#include "PSOutputDev.h"
namespace
{
std::unique_ptr<PDFDoc> openDocument(const char *filePath) {
std::unique_ptr<PDFDoc> document{
new PDFDoc(new GooString(filePath), nullptr, nullptr, nullptr)
};
if (!document->isOk()) {
document.reset();
}
return document;
}
std::unique_ptr<SplashOutputDev> openSplashDevice(PDFDoc *document, SplashColorPtr paperColor) {
std::unique_ptr<SplashOutputDev> device{
new SplashOutputDev(splashModeXBGR8, 4, false, paperColor)
};
if (device) {
device->startDoc(document);
}
return device;
}
void displayPageUsingSplash(PDFDoc *document, SplashOutputDev *device, int page, double resolution) {
document->displayPage(
device, page,
resolution, resolution, 0,
false, false, false
);
delete device->takeBitmap();
}
bool renderAllPagesUsingSplash(PDFDoc *document, SplashColorPtr paperColor, double resolution) {
const auto device = openSplashDevice(document, paperColor);
if (!device) {
return false;
}
for (int page = 1, pageCount = document->getNumPages(); page <= pageCount; ++page) {
displayPageUsingSplash(document, device.get(), page, resolution);
}
return true;
}
bool renderPageUsingSplash(PDFDoc *document, int page, SplashColorPtr paperColor, double resolution) {
const auto device = openSplashDevice(document, paperColor);
if (!device) {
return false;
}
displayPageUsingSplash(document, device.get(), page, resolution);
delete device->takeBitmap();
return true;
}
std::unique_ptr<TextOutputDev> openTextDevice() {
std::unique_ptr<TextOutputDev> device{
new TextOutputDev(nullptr, false, 0.0, false, false)
};
if (!device->isOk()) {
device.reset();
}
return device;
}
void displayPageAsText(PDFDoc *document, TextOutputDev *device, int page) {
document->displayPage(
device, page,
72.0, 72.0, 0,
false, false, false
);
delete device->makeWordList();
}
bool renderAllPagesAsText(PDFDoc *document) {
const auto device = openTextDevice();
if (!device) {
return false;
}
for (int page = 1, pageCount = document->getNumPages(); page <= pageCount; ++page) {
displayPageAsText(document, device.get(), page);
}
return true;
}
bool renderPageAsText(PDFDoc *document, int page) {
const auto device = openTextDevice();
if (!device) {
return false;
}
displayPageAsText(document, device.get(), page);
return true;
}
void dummyOutputFunction(void*, const char*, int) {}
std::unique_ptr<PSOutputDev> openPostScriptDevice(PDFDoc *document, const std::vector<int>& pages) {
std::unique_ptr<PSOutputDev> device{
new PSOutputDev(dummyOutputFunction, nullptr, nullptr, document, pages, psModePS)
};
if (!device->isOk()) {
device.reset();
}
return device;
}
void displayPageAsPostScript(PDFDoc *document, PSOutputDev *device, int page) {
document->displayPage(
device, page,
72.0, 72.0, 0,
false, false, false
);
}
bool renderAllPagesAsPostScript(PDFDoc *document) {
std::vector<int> pages(document->getNumPages());
std::iota(pages.begin(), pages.end(), 1);
const auto device = openPostScriptDevice(document, pages);
if (!device) {
return false;
}
for (int page : pages) {
displayPageAsPostScript(document, device.get(), page);
}
return true;
}
bool renderPageAsPostScript(PDFDoc *document, int page) {
std::vector<int> pages{page};
const auto device = openPostScriptDevice(document, pages);
if (!device) {
return false;
}
displayPageAsPostScript(document, device.get(), page);
return true;
}
double computeAccuracy(const std::vector<double>& values) {
if (values.size() < 2) {
return std::numeric_limits<double>::max();
}
const auto sum = std::accumulate(values.begin(), values.end(), 0.0);
const auto mean = sum / values.size();
const auto variance = std::accumulate(
values.begin(), values.end(), 0.0,
[mean](double variance, double value) {
return variance + (value - mean) * (value - mean);
}
);
const auto stdev = std::sqrt(variance / (values.size() - 1));
return std::abs(stdev / mean);
}
bool checkPageCount(const char* filePath) {
const auto document = openDocument(filePath);
if (!document) {
return false;
}
std::cout << document->getNumPages();
return true;
}
bool measureAction(const std::function<bool()>& action, int warmUpIterations, int minIterations, int maxIterations, double targetAccuracy) {
std::vector<double> runtimes;
runtimes.reserve(maxIterations);
for (int iteration = 1; iteration <= warmUpIterations; ++iteration) {
action();
}
for (int iteration = 1; iteration <= maxIterations; ++iteration) {
struct timespec before;
::clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &before);
if (!action()) {
return false;
}
struct timespec after;
::clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &after);
runtimes.emplace_back(
(after.tv_sec - before.tv_sec) * 1.0e+6 +
(after.tv_nsec - before.tv_nsec) * 1.0e-3
);
if (iteration >= minIterations && computeAccuracy(runtimes) <= targetAccuracy) {
break;
}
}
std::cout << "[";
for (auto runtime = runtimes.begin(); runtime != runtimes.end(); ++runtime) {
if (runtime != runtimes.begin()) {
std::cout << ',';
}
std::cout << *runtime;
}
std::cout << "]";
return true;
}
}
int main(int argc, char** argv) {
std::unique_ptr<GlobalParams> globalParams{
new GlobalParams()
};
if (!globalParams) {
return EXIT_FAILURE;
}
::globalParams = globalParams.get();
if (argc == 2) {
return checkPageCount(argv[1]) ? EXIT_SUCCESS : EXIT_FAILURE;
}
else if (argc != 8) {
return EXIT_FAILURE;
}
const auto warmUpIterations = std::atoi(argv[1]);
const auto minIterations = std::atoi(argv[2]);
const auto maxIterations = std::atoi(argv[3]);
const auto targetAccuracy = std::atof(argv[4]);
const auto mode = std::string{argv[5]};
const auto filePath = argv[6];
const auto page = std::atoi(argv[7]);
if (warmUpIterations < 1 || minIterations < 1 || maxIterations < 1 || minIterations > maxIterations) {
return EXIT_FAILURE;
}
if (mode != "splash" && mode != "text" && mode != "postscript") {
return EXIT_FAILURE;
}
SplashColor paperColor;
paperColor[0] = 0xFF;
paperColor[1] = 0xFF;
paperColor[2] = 0xFF;
const auto resolution = 72.0;
const auto document = openDocument(filePath);
if (!document) {
return EXIT_FAILURE;
}
const auto pageCount = document->getNumPages();
if (pageCount < 1 || pageCount < page) {
return EXIT_FAILURE;
}
std::function<bool()> action;
if (mode == "splash" && page < 1) {
action = [&document, &paperColor, resolution]() {
return renderAllPagesUsingSplash(document.get(), paperColor, resolution);
};
}
else if (mode == "splash") {
action = [&document, page, &paperColor, resolution]() {
return renderPageUsingSplash(document.get(), page, paperColor, resolution);
};
}
else if (mode == "text" && page < 1) {
action = [&document]() {
return renderAllPagesAsText(document.get());
};
}
else if (mode == "text") {
action = [&document, page]() {
return renderPageAsText(document.get(), page);
};
}
else if (mode == "postscript" && page < 1) {
action = [&document]() {
return renderAllPagesAsPostScript(document.get());
};
}
else if (mode == "postscript") {
action = [&document, page]() {
return renderPageAsPostScript(document.get(), page);
};
}
if (!action) {
return EXIT_FAILURE;
}
return measureAction(action, warmUpIterations, minIterations, maxIterations, targetAccuracy) ? EXIT_SUCCESS : EXIT_FAILURE;
}
import json
import multiprocessing
import os
import pickle
import statistics
import subprocess
import sys
import time
import zlib
from util import rewrite, reference
def check_page_count(document):
global driver
try:
return int(subprocess.check_output([ driver, document ], stderr = subprocess.DEVNULL))
except (subprocess.CalledProcessError, ValueError):
return 0
def measure_command(command):
try:
process = subprocess.Popen(command, stdout = subprocess.PIPE, stderr = subprocess.DEVNULL)
_, status, resources = os.wait4(process.pid, 0)
if not os.WIFEXITED(status) or os.WEXITSTATUS(status) != 0:
return ' '.join(command)
run_times = json.load(process.stdout)
run_time = (statistics.mean(run_times), statistics.stdev(run_times))
memory_usage = resources.ru_maxrss
return (run_time, memory_usage)
except:
return sys.exc_info()[0]
def measure_task(task):
global driver, mode, warm_up_iterations, min_iterations, max_iterations, target_accuracy
document, page = task
command = [
driver,
str(warm_up_iterations), str(min_iterations), str(max_iterations),
str(target_accuracy),
mode,
document,
str(page) if page else '0'
]
return (document, page, measure_command(command))
def measure(args):
global driver, mode, warm_up_iterations, min_iterations, max_iterations, target_accuracy
driver = args.driver
mode = args.mode
warm_up_iterations = args.warm_up_iterations
min_iterations = args.min_iterations
max_iterations = args.max_iterations
target_accuracy = args.target_accuracy
todo = 0
written = rewrite(0, 'Scanning...')
tasks = []
for path, _, files in os.walk(args.documents):
for file_name in files:
if not file_name.endswith('.pdf'):
continue
file_path = os.path.join(path, file_name)
page_count = check_page_count(file_path)
if page_count == 0:
continue
pages = range(1, page_count + 1) if args.pages else [ None ]
for page in pages:
tasks.append((file_path, page))
todo +=1
if todo % 100 == 0:
written = rewrite(written, 'Found %d...' % todo)
done = 0
written = rewrite(written, '%d/%d (%.1f%%): Measuring...' % (done, todo, 0))
begin = time.time()
process_count = max(1, multiprocessing.cpu_count() - 1)
with multiprocessing.Pool(process_count) as pool:
results = {}
for result in pool.imap(measure_task, tasks):
document, page, measurement = result
try:
run_time, memory_usage = measurement
except:
rewrite(written, 'Measurement failed: %s\n' % (measurement))
sys.exit(1)
entry = results.setdefault(document, {}).setdefault(page, {})
entry['run_time'] = run_time
entry['memory_usage'] = memory_usage
done += 1
if done % max(1, todo // 500) == 0:
written = rewrite(written, '%d/%d (%.1f%%): Measured %s...' % (done, todo, 100 * done / todo, reference(document, page)))
end = time.time()
rewrite(written, '%d/%d (%.1f%%): Measurement took %s.\n' % (done, todo, 100, time.strftime('%T', time.gmtime(end - begin))))
with open(args.results, 'wb') as results_file:
data = pickle.dumps(results)
data = zlib.compress(data)
results_file.write(data)
#!/usr/bin/env python3
import argparse
import sys
from measure import measure
from compare import compare
def main(args):
parser = argparse.ArgumentParser()
subparsers = parser.add_subparsers()
measure_parser = subparsers.add_parser('measure')
measure_parser.set_defaults(func=measure)
measure_parser.add_argument('--driver', default='./driver')
measure_parser.add_argument('--mode', choices=[ 'splash', 'text', 'postscript' ], default='splash')
measure_parser.add_argument('--pages', action='store_true')
measure_parser.add_argument('--warm_up_iterations', type = int, default=5)
measure_parser.add_argument('--min_iterations', type=int, default=5)
measure_parser.add_argument('--max_iterations', type=int, default=25)
measure_parser.add_argument('--target_accuracy', type=float, default=0.01)
measure_parser.add_argument('documents')
measure_parser.add_argument('results')
compare_parser = subparsers.add_parser('compare')
compare_parser.set_defaults(func=compare)
compare_parser.add_argument('--threshold', type=float, default=0.05)
compare_parser.add_argument('results')
compare_parser.add_argument('reference_results')
try:
args = parser.parse_args(args)
args.func(args)
return 0
except AttributeError:
parser.print_help()
return 1
if __name__ == '__main__':
sys.exit(main(sys.argv[1:]))
import math
import sys
def rewrite(written, message):
sys.stdout.write('\r' * written + ' ' * written + '\r')
written = sys.stdout.write(message)
sys.stdout.flush()
return written
def reference(document, page):
if page is not None:
return '%s[%d]' % (document, page)
else:
return document
def reldev(x, y):
return (x - y) / y
def maxabs(x, y):
return max(abs(x), abs(y))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment