Commit 98d1b3dc authored by Evangelos Rigas's avatar Evangelos Rigas Committed by Albert Astals Cid

[core] Add support for PDF subtype property

Parse /GTS_PDF(A,E,UA,VT,X)Version from the PDF Information
Dictionary into three enums: PDFSubtype, PDFSubtypePart, and
PDFSubtypeConformance.
parent 4f039c57
......@@ -40,6 +40,7 @@
// Copyright (C) 2018 Ben Timby <btimby@gmail.com>
// Copyright (C) 2018 Evangelos Foutras <evangelos@foutrelis.com>
// Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich
// Copyright (C) 2018 Evangelos Rigas <erigas@rnd2.org>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
......@@ -62,6 +63,7 @@
#include <stddef.h>
#include <string.h>
#include <time.h>
#include <regex>
#include <sys/stat.h>
#include "goo/glibc.h"
#include "goo/gstrtod.h"
......@@ -318,6 +320,9 @@ GBool PDFDoc::setup(GooString *ownerPassword, GooString *userPassword) {
}
}
// Extract PDF Subtype information
extractPDFSubtype();
// done
return gTrue;
}
......@@ -482,6 +487,133 @@ GBool PDFDoc::checkEncryption(GooString *ownerPassword, GooString *userPassword)
return ret;
}
static PDFSubtypePart pdfPartFromString(PDFSubtype subtype, GooString *pdfSubtypeVersion) {
const std::regex regex("PDF/(?:A|X|VT|E|UA)-([[:digit:]])(?:[[:alpha:]]{1,2})?:?([[:digit:]]{4})?");
std::smatch match;
std::string pdfsubver = pdfSubtypeVersion->toStr();
PDFSubtypePart subtypePart = subtypePartNone;
if (std::regex_search(pdfsubver, match, regex)) {
int date = 0;
const int part = std::stoi(match.str(1));
if (match[2].matched) {
date = std::stoi(match.str(2));
}
switch (subtype) {
case subtypePDFX:
switch (part) {
case 1:
switch (date) {
case 2001:
default:
subtypePart = subtypePart1;
break;
case 2003:
subtypePart = subtypePart4;
break;
}
break;
case 2:
subtypePart = subtypePart5;
break;
case 3:
switch (date) {
case 2002:
default:
subtypePart = subtypePart3;
break;
case 2003:
subtypePart = subtypePart6;
break;
}
break;
case 4:
subtypePart = subtypePart7;
break;
case 5:
subtypePart = subtypePart8;
break;
}
break;
default:
subtypePart = (PDFSubtypePart)part;
break;
}
}
return subtypePart;
}
static PDFSubtypeConformance pdfConformanceFromString(GooString *pdfSubtypeVersion) {
const std::regex regex("PDF/(?:A|X|VT|E|UA)-[[:digit:]]([[:alpha:]]+)");
std::smatch match;
const std::string pdfsubver = pdfSubtypeVersion->toStr();
PDFSubtypeConformance pdfConf = subtypeConfNone;
// match contains the PDF conformance (A, B, G, N, P, PG or U)
if (std::regex_search(pdfsubver, match, regex)) {
GooString *conf = new GooString(match.str(1));
// Convert to lowercase as the conformance may appear in both cases
conf->lowerCase();
if (conf->cmp("a")==0) {
pdfConf = subtypeConfA;
} else if (conf->cmp("b")==0) {
pdfConf = subtypeConfB;
} else if (conf->cmp("g")==0) {
pdfConf = subtypeConfG;
} else if (conf->cmp("n")==0) {
pdfConf = subtypeConfN;
} else if (conf->cmp("p")==0) {
pdfConf = subtypeConfP;
} else if (conf->cmp("pg")==0) {
pdfConf = subtypeConfPG;
} else if (conf->cmp("u")==0) {
pdfConf = subtypeConfU;
} else {
pdfConf = subtypeConfNone;
}
delete conf;
}
return pdfConf;
}
void PDFDoc::extractPDFSubtype() {
pdfSubtype = subtypeNull;
pdfPart = subtypePartNull;
pdfConformance = subtypeConfNull;
GooString *pdfSubtypeVersion = nullptr;
// Find PDF InfoDict subtype key if any
if ((pdfSubtypeVersion = getDocInfoStringEntry("GTS_PDFA1Version"))) {
pdfSubtype = subtypePDFA;
} else if ((pdfSubtypeVersion = getDocInfoStringEntry("GTS_PDFEVersion"))) {
pdfSubtype = subtypePDFE;
} else if ((pdfSubtypeVersion = getDocInfoStringEntry("GTS_PDFUAVersion"))) {
pdfSubtype = subtypePDFUA;
} else if ((pdfSubtypeVersion = getDocInfoStringEntry("GTS_PDFVTVersion"))) {
pdfSubtype = subtypePDFVT;
} else if ((pdfSubtypeVersion = getDocInfoStringEntry("GTS_PDFXVersion"))) {
pdfSubtype = subtypePDFX;
} else {
pdfSubtype = subtypeNone;
pdfPart = subtypePartNone;
pdfConformance = subtypeConfNone;
return;
}
// Extract part from version string
pdfPart = pdfPartFromString(pdfSubtype, pdfSubtypeVersion);
// Extract conformance from version string
pdfConformance = pdfConformanceFromString(pdfSubtypeVersion);
delete pdfSubtypeVersion;
}
std::vector<FormWidgetSignature*> PDFDoc::getSignatureWidgets()
{
int num_pages = getNumPages();
......
......@@ -31,6 +31,7 @@
// Copyright (C) 2015 André Esser <bepandre@hotmail.com>
// Copyright (C) 2016 Jakub Alba <jakubalba@gmail.com>
// Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <info@kdab.com>. Work sponsored by the LiMux project of the city of Munich
// Copyright (C) 2018 Evangelos Rigas <erigas@rnd2.org>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
......@@ -74,6 +75,41 @@ enum PDFWriteMode {
writeForceIncremental
};
enum PDFSubtype {
subtypeNull,
subtypePDFA,
subtypePDFE,
subtypePDFUA,
subtypePDFVT,
subtypePDFX,
subtypeNone
};
enum PDFSubtypePart {
subtypePartNull,
subtypePart1,
subtypePart2,
subtypePart3,
subtypePart4,
subtypePart5,
subtypePart6,
subtypePart7,
subtypePart8,
subtypePartNone
};
enum PDFSubtypeConformance {
subtypeConfNull,
subtypeConfA,
subtypeConfB,
subtypeConfG,
subtypeConfN,
subtypeConfP,
subtypeConfPG,
subtypeConfU,
subtypeConfNone
};
//------------------------------------------------------------------------
// PDFDoc
//------------------------------------------------------------------------
......@@ -273,6 +309,11 @@ public:
GooString *getDocInfoCreatDate() { return getDocInfoStringEntry("CreationDate"); }
GooString *getDocInfoModDate() { return getDocInfoStringEntry("ModDate"); }
// Return the PDF subtype, part, and conformance
PDFSubtype getPDFSubtype() const { return pdfSubtype; }
PDFSubtypePart getPDFSubtypePart() const { return pdfPart; }
PDFSubtypeConformance getPDFSubtypeConformance() const { return pdfConformance; }
// Return the PDF version specified by the file.
int getPDFMajorVersion() { return pdfMajorVersion; }
int getPDFMinorVersion() { return pdfMinorVersion; }
......@@ -346,6 +387,8 @@ private:
GBool checkFooter();
void checkHeader();
GBool checkEncryption(GooString *ownerPassword, GooString *userPassword);
void extractPDFSubtype();
// Get the offset of the start xref table.
Goffset getStartXRef(GBool tryingToReconstruct = gFalse);
// Get the offset of the entries in the main XRef table of a
......@@ -365,6 +408,9 @@ private:
void *guiData;
int pdfMajorVersion;
int pdfMinorVersion;
PDFSubtype pdfSubtype;
PDFSubtypePart pdfPart;
PDFSubtypeConformance pdfConformance;
Linearization *linearization;
// linearizationState = 0: unchecked
// linearizationState = 1: checked and valid
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment