Commit a6dd3f95 authored by Adrian Johnson's avatar Adrian Johnson

Support unicode on windows console

The Win32Console should be used in programs that require unicode
support for command line arguments and stdio ouput on windows. On
windows it gets the command line arguments from GetCommandLineW and
converts to UTF-8, and redefines the stdio output functions to convert
UTF-8 to calls to WriteConsoleW. On other platforms this class is a
no-op.
parent 49107ffc
......@@ -81,6 +81,7 @@
#endif
#include "PDFDoc.h"
#include "Hints.h"
#include "UTF.h"
#ifdef MULTITHREADED
# define pdfdocLocker() MutexLocker locker(&mutex)
......@@ -152,7 +153,13 @@ PDFDoc::PDFDoc(GooString *fileNameA, GooString *ownerPassword,
#endif
// try to open file
file = GooFile::open(fileName);
#ifdef _WIN32
wchar_t *wFileName = (wchar_t*)utf8ToUtf16(fileName->getCString());
file = GooFile::open(wFileName);
gfree(wFileName);
#else
file = GooFile::open(fileName);
#endif
if (file == NULL) {
// fopen() has failed.
// Keep a copy of the errno returned by fopen so that it can be
......
This diff is collapsed.
......@@ -16,6 +16,9 @@
#pragma implementation
#endif
#include <cstdint>
#include <climits>
#include "goo/GooString.h"
#include "CharTypes.h"
......@@ -39,4 +42,40 @@ bool UnicodeIsValid(Unicode ucs4);
// is a unicode whitespace character
bool UnicodeIsWhitespace(Unicode ucs4);
// Count number of UTF-16 code units required to convert a UTF-8 string
// (excluding terminating NULL). Each invalid byte is counted as a
// code point since the UTF-8 conversion functions will replace it with
// REPLACEMENT_CHAR.
int utf8CountUtf16CodeUnits(const char *utf8);
// Convert UTF-8 to UTF-16
// utf8- UTF-8 string to convert. If not null terminated, set maxUtf8 to num
// bytes to convert
// utf16 - output buffer to write UTF-16 to. Output will always be null terminated.
// maxUtf16 - maximum size of output buffer including space for null.
// maxUtf8 - maximum number of UTF-8 bytes to convert. Conversion stops when
// either this count is reached or a null is encountered.
// Returns number of UTF-16 code units written (excluding NULL).
int utf8ToUtf16(const char *utf8, uint16_t *utf16, int maxUtf16 = INT_MAX, int maxUtf8 = INT_MAX);
// Allocate utf16 string and convert utf8 into it.
uint16_t *utf8ToUtf16(const char *utf8, int *len = nullptr);
// Count number of UTF-8 bytes required to convert a UTF-16 string to
// UTF-8 (excluding terminating NULL).
int utf16CountUtf8Bytes(const uint16_t *utf16);
// Convert UTF-16 to UTF-8
// utf16- UTF-16 string to convert. If not null terminated, set maxUtf16 to num
// code units to convert
// utf8 - output buffer to write UTF-8 to. Output will always be null terminated.
// maxUtf8 - maximum size of output buffer including space for null.
// maxUtf16 - maximum number of UTF-16 code units to convert. Conversion stops when
// either this count is reached or a null is encountered.
// Returns number of UTF-8 bytes written (excluding NULL).
int utf16ToUtf8(const uint16_t *utf16, char *utf8, int maxUtf8 = INT_MAX, int maxUtf16 = INT_MAX);
// Allocate utf8 string and convert utf16 into it.
char *utf16ToUtf8(const uint16_t *utf16, int *len = nullptr);
#endif
......@@ -69,6 +69,7 @@ qt5_add_qtest(check_qt5_actualtext check_actualtext.cpp)
qt5_add_qtest(check_qt5_lexer check_lexer.cpp)
qt5_add_qtest(check_qt5_pagelabelinfo check_pagelabelinfo.cpp)
qt5_add_qtest(check_qt5_goostring check_goostring.cpp)
qt5_add_qtest(check_qt5_utf_conversion check_utf_conversion.cpp)
if (NOT WIN32)
qt5_add_qtest(check_qt5_strings check_strings.cpp)
endif ()
#include <QtCore/QScopedPointer>
#include <QtTest/QtTest>
#include <cstring>
#include "UTF.h"
class TestUTFConversion : public QObject
{
Q_OBJECT
private slots:
void testUTF_data();
void testUTF();
};
static bool compare(const char *a, const char *b)
{
return strcmp(a, b) == 0;
}
static bool compare(const uint16_t *a, const uint16_t *b)
{
while (*a && *b) {
if (*a++ != *b++)
return false;
}
return *a == *b;
}
void TestUTFConversion::testUTF_data()
{
QTest::addColumn<QString>("s");
QTest::newRow("<empty>") << QString::fromUtf8("");
QTest::newRow("a") << QString::fromUtf8("a");
QTest::newRow("abc") << QString::fromUtf8("abc");
QTest::newRow("Latin") << QString::fromUtf8("Vitrum edere possum; mihi non nocet");
QTest::newRow("Greek") << QString::fromUtf8("Μπορώ να φάω σπασμένα γυαλιά χωρίς να πάθω τίποτα");
QTest::newRow("Icelandic") << QString::fromUtf8("Ég get etið gler án þess að meiða mig");
QTest::newRow("Russian") << QString::fromUtf8("Я могу есть стекло, оно мне не вредит.");
QTest::newRow("Sanskrit") << QString::fromUtf8("काचं शक्नोम्यत्तुम् । नोपहिनस्ति माम् ॥");
QTest::newRow("Arabic") << QString::fromUtf8("أنا قادر على أكل الزجاج و هذا لا يؤلمني");
QTest::newRow("Chinese") << QString::fromUtf8("我能吞下玻璃而不伤身体。");
QTest::newRow("Thai") << QString::fromUtf8("ฉันกินกระจกได้ แต่มันไม่ทำให้ฉันเจ็บ");
QTest::newRow("non BMP") << QString::fromUtf8("𝓹𝓸𝓹𝓹𝓵𝓮𝓻");
}
void TestUTFConversion::testUTF()
{
char utf8Buf[100];
char *utf8String;
uint16_t utf16Buf[100];
uint16_t *utf16String;
int len;
QFETCH(QString, s);
char *str = strdup(s.toUtf8().constData());
// UTF-8 to UTF-16
// QString size() returns number of code units, not code points
QCOMPARE( utf8CountUtf16CodeUnits(str), s.size() );
len = utf8ToUtf16(str, utf16Buf);
QVERIFY( compare(utf16Buf, s.utf16()) );
QCOMPARE( len, s.size() );
utf16String = utf8ToUtf16(str);
QVERIFY( compare(utf16String, s.utf16()) );
free (utf16String);
// UTF-16 to UTF-8
QCOMPARE( utf16CountUtf8Bytes(s.utf16()), (int)strlen(str) );
len = utf16ToUtf8(s.utf16(), utf8Buf);
QVERIFY( compare(utf8Buf, str) );
QCOMPARE( len, (int)strlen(str) );
utf8String = utf16ToUtf8(s.utf16() );
QVERIFY( compare(utf8String, str) );
free (utf8String);
free(str);
}
QTEST_GUILESS_MAIN(TestUTFConversion)
#include "check_utf_conversion.moc"
set(common_srcs
parseargs.cc
Win32Console.cc
)
set(common_libs
poppler
......
......@@ -24,6 +24,7 @@
#include "Form.h"
#include "UnicodeMap.h"
#include "UTF.h"
#include "Win32Console.h"
JSInfo::JSInfo(PDFDoc *docA, int firstPage) {
doc = docA;
......
//========================================================================
//
// Win32Console.cc
//
// This file is licensed under the GPLv2 or later
//
// Copyright (C) 2017 Adrian Johnson <ajohnson@redneon.com>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
//
//========================================================================
#ifdef _WIN32
#include "goo/gmem.h"
#include "UTF.h"
#define WIN32_CONSOLE_IMPL
#include "Win32Console.h"
#include <windows.h>
#include <shellapi.h>
static const int BUF_SIZE = 4096;
static int bufLen = 0;
static char buf[BUF_SIZE];
static wchar_t wbuf[BUF_SIZE];
static bool stdoutIsConsole = true;
static bool stderrIsConsole = true;
static HANDLE consoleHandle = 0;
// If all = true, flush all characters to console.
// If all = false, flush up to and including last newline.
// Also flush all if buffer > half full to ensure space for future
// writes.
static void flush(bool all = false)
{
int nchars = 0;
if (all || bufLen > BUF_SIZE/2) {
nchars = bufLen;
} else if (bufLen > 0) {
// find num chars up to and including last '\n'
for (nchars = bufLen; nchars > 0; --nchars) {
if (buf[nchars-1] == '\n')
break;
}
}
if (nchars > 0) {
DWORD wlen = utf8ToUtf16(buf, (uint16_t*)wbuf, BUF_SIZE, nchars);
WriteConsoleW(consoleHandle, wbuf, wlen, &wlen, nullptr);
if (nchars < bufLen) {
memmove(buf, buf + nchars, bufLen - nchars);
bufLen -= nchars;
} else {
bufLen = 0;
}
}
}
static inline bool streamIsConsole(FILE *stream)
{
return ((stream == stdout && stdoutIsConsole) || (stream == stderr && stderrIsConsole));
}
int win32_fprintf(FILE *stream, ...)
{
va_list args;
int ret = 0;
va_start(args, stream);
const char *format = va_arg(args, const char *);
if (streamIsConsole(stream)) {
ret = vsnprintf(buf + bufLen, BUF_SIZE - bufLen, format, args);
bufLen += ret;
if (ret >= BUF_SIZE - bufLen) {
// output was truncated
buf[BUF_SIZE - 1] = 0;
bufLen = BUF_SIZE - 1;
}
flush();
} else {
vfprintf(stream, format, args);
}
va_end(args);
return ret;
}
size_t win32_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream)
{
size_t ret = 0;
if (streamIsConsole(stream)) {
int n = size * nmemb;
if (n > BUF_SIZE - bufLen - 1)
n = BUF_SIZE - bufLen - 1;
memcpy(buf + bufLen, ptr, n);
bufLen += n;
buf[bufLen] = 0;
flush();
} else {
ret = fwrite(ptr, size, nmemb, stream);
}
return ret;
}
Win32Console::Win32Console(int *argc, char **argv[])
{
LPWSTR *wargv;
fpos_t pos;
argList = nullptr;
privateArgList = nullptr;
wargv = CommandLineToArgvW(GetCommandLineW(), &numArgs);
if (wargv) {
argList = new char*[numArgs];
privateArgList = new char*[numArgs];
for (int i = 0; i < numArgs; i++) {
argList[i] = utf16ToUtf8((uint16_t*)(wargv[i]));
// parseArgs will rearrange the argv list so we keep our own copy
// to use for freeing all the strings
privateArgList[i] = argList[i];
}
LocalFree(wargv);
*argc = numArgs;
*argv = argList;
}
bufLen = 0;
buf[0] = 0;
wbuf[0] = 0;
// check if stdout or stderr redirected
// GetFileType() returns CHAR for console and special devices COMx, PRN, CON, NUL etc
// fgetpos() succeeds on all CHAR devices except console and CON.
stdoutIsConsole = (GetFileType(GetStdHandle(STD_OUTPUT_HANDLE)) == FILE_TYPE_CHAR)
&& (fgetpos(stdout, &pos) != 0);
stderrIsConsole = (GetFileType(GetStdHandle(STD_ERROR_HANDLE)) == FILE_TYPE_CHAR)
&& (fgetpos(stderr, &pos) != 0);
// Need a handle to the console. Doesn't matter if we use stdout or stderr as
// long as the handle output is to the console.
if (stdoutIsConsole)
consoleHandle = GetStdHandle(STD_OUTPUT_HANDLE);
else if (stderrIsConsole)
consoleHandle = GetStdHandle(STD_ERROR_HANDLE);
}
Win32Console::~Win32Console()
{
flush(true);
if (argList) {
for (int i = 0; i < numArgs; i++)
gfree(privateArgList[i]);
delete[] argList;
delete[] privateArgList;
}
}
#endif // _WIN32
//========================================================================
//
// Win32Console.h
//
// This file is licensed under the GPLv2 or later
//
// Copyright (C) 2017 Adrian Johnson <ajohnson@redneon.com>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
//
//========================================================================
// UTF-8 Support for win32 console
//
// Converts argc/argv to UTF-8. Supports UTF-8 stdout/stderr to win32 console.
// On other platforms this class is a no-op.
#ifdef _WIN32
// Ensure stdio.h is included before redefining stdio functions. We need to provide
// our own declarations for the redefined functions because win32 stdio.h functions
// have DLL export decorations.
#include <stdio.h>
#ifndef WIN32_CONSOLE_IMPL // don't redefine in Win32Console.cc so we can call original functions
#define printf(...) win32_fprintf(stdout, __VA_ARGS__)
#define fprintf(stream, ...) win32_fprintf(stream, __VA_ARGS__)
#define puts(s) win32_fprintf(stdout, "%s\n", s)
#define fputs(s, stream) win32_fprintf(stream, "%s", s)
#define putc(c) win32_fprintf(stdout, "%c", c)
#define putchar(c) win32_fprintf(stdout, "%c", c)
#define fputc(c, stream) win32_fprintf(stream, "%c", c)
#define fwrite(ptr, size, nmemb, stream) win32_fwrite(ptr, size, nmemb, stream)
#endif
extern "C" {
int win32_fprintf(FILE *stream, ...);
size_t win32_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream);
}
class Win32Console
{
public:
Win32Console(int *argc, char **argv[]);
~Win32Console();
private:
int numArgs;
char **argList;
char **privateArgList;
};
#else
// On other platforms this class is a no-op.
class Win32Console
{
public:
Win32Console(int *argc, char ***argv) {}
};
#endif // _WIN32
......@@ -40,6 +40,7 @@
#include "UnicodeMap.h"
#include "PDFDocEncoding.h"
#include "Error.h"
#include "Win32Console.h"
static GBool doList = gFalse;
static int saveNum = 0;
......@@ -99,6 +100,7 @@ int main(int argc, char *argv[]) {
Unicode u;
GBool isUnicode;
Win32Console win32Console(&argc, &argv);
exitCode = 99;
// parse args
......
......@@ -39,6 +39,7 @@
#include "PDFDoc.h"
#include "PDFDocFactory.h"
#include "FontInfo.h"
#include "Win32Console.h"
static const char *fontTypeNames[] = {
"unknown",
......@@ -94,6 +95,7 @@ int main(int argc, char *argv[]) {
GBool ok;
int exitCode;
Win32Console win32Console(&argc, &argv);
exitCode = 99;
// parse args
......
......@@ -47,6 +47,7 @@
#include "PDFDocFactory.h"
#include "ImageOutputDev.h"
#include "Error.h"
#include "Win32Console.h"
static int firstPage = 1;
static int lastPage = 0;
......@@ -120,6 +121,7 @@ int main(int argc, char *argv[]) {
GBool ok;
int exitCode;
Win32Console win32Console(&argc, &argv);
exitCode = 99;
// parse args
......
......@@ -61,6 +61,7 @@
#include "JSInfo.h"
#include "StructTreeRoot.h"
#include "StructElement.h"
#include "Win32Console.h"
static int firstPage = 1;
......@@ -616,6 +617,7 @@ int main(int argc, char *argv[]) {
exitCode = 99;
// parse args
Win32Console win32console(&argc, &argv);
ok = parseArgs(argDesc, &argc, argv);
if (!ok || (argc != 2 && !printEnc) || printVersion || printHelp) {
fprintf(stderr, "pdfinfo version %s\n", PACKAGE_VERSION);
......
......@@ -23,6 +23,7 @@
#include "PDFDoc.h"
#include "ErrorCodes.h"
#include "GlobalParams.h"
#include "Win32Console.h"
#include <ctype.h>
static int firstPage = 0;
......@@ -150,6 +151,7 @@ main (int argc, char *argv[])
exitCode = 99;
// parse args
Win32Console win32console(&argc, &argv);
ok = parseArgs (argDesc, &argc, argv);
if (!ok || argc != 3 || printVersion || printHelp)
{
......
......@@ -29,6 +29,7 @@
#include "Error.h"
#include "GlobalParams.h"
#include "SignatureInfo.h"
#include "Win32Console.h"
static const char * getReadableSigState(SignatureValidationStatus sig_vs)
{
......@@ -116,6 +117,7 @@ int main(int argc, char *argv[])
std::vector<FormWidgetSignature*> sig_widgets;
globalParams = new GlobalParams();
Win32Console win32Console(&argc, &argv);
int exitCode = 99;
GBool ok;
......
......@@ -18,6 +18,7 @@
#include "parseargs.h"
#include "pdftocairo-win32.h"
#include "Win32Console.h"
#include <dlgs.h>
#include <commctrl.h>
......
......@@ -55,6 +55,7 @@
#include "PDFDoc.h"
#include "PDFDocFactory.h"
#include "CairoOutputDev.h"
#include "Win32Console.h"
#ifdef USE_CMS
#ifdef USE_LCMS1
#include <lcms.h>
......@@ -943,6 +944,7 @@ int main(int argc, char *argv[]) {
int num_outputs;
// parse args
Win32Console win32Console(&argc, &argv);
if (!parseArgs(argDesc, &argc, argv)) {
printUsage("pdftocairo", 0, argDesc);
exit(99);
......
......@@ -63,6 +63,7 @@
#include "Error.h"
#include "DateInfo.h"
#include "goo/gfile.h"
#include "Win32Console.h"
static int firstPage = 1;
static int lastPage = 0;
......@@ -190,6 +191,7 @@ int main(int argc, char *argv[]) {
Object info;
int exit_status = EXIT_FAILURE;
Win32Console win32Console(&argc, &argv);
// parse args
ok = parseArgs(argDesc, &argc, argv);
if (!ok || argc < 2 || argc > 3 || printHelp || printVersion) {
......
......@@ -51,6 +51,7 @@
#include "splash/SplashBitmap.h"
#include "splash/Splash.h"
#include "SplashOutputDev.h"
#include "Win32Console.h"
// Uncomment to build pdftoppm with pthreads
// You may also have to change the buildsystem to
......@@ -394,6 +395,7 @@ int main(int argc, char *argv[]) {
int pg, pg_num_len;
double pg_w, pg_h, tmp;
Win32Console win32Console(&argc, &argv);
exitCode = 99;
// parse args
......
......@@ -51,6 +51,7 @@
#include "PDFDocFactory.h"
#include "PSOutputDev.h"
#include "Error.h"
#include "Win32Console.h"
static GBool setPSPaperSize(char *size, int &psPaperWidth, int &psPaperHeight) {
if (!strcmp(size, "match")) {
......@@ -218,6 +219,7 @@ int main(int argc, char *argv[]) {
GBool rasterAntialias = gFalse;
std::vector<int> pages;
Win32Console win32Console(&argc, &argv);
exitCode = 99;
// parse args
......
......@@ -59,6 +59,7 @@
#include <string>
#include <sstream>
#include <iomanip>
#include "Win32Console.h"
static void printInfoString(FILE *f, Dict *infoDict, const char *key,
const char *text1, const char *text2, UnicodeMap *uMap);
......@@ -180,6 +181,7 @@ int main(int argc, char *argv[]) {
char *p;
int exitCode;
Win32Console win32Console(&argc, &argv);
exitCode = 99;
// parse args
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment