Commit 192296d8 authored by Keith Packard's avatar Keith Packard

Adopt some RedHat suggestions for standard font configuration.

Add new helper program 'fc-case' to construct case folding tables from
    standard Unicode CaseFolding.txt file
Re-implement case insensitive functions with Unicode aware versions
    (including full case folding mappings)
parent 5cf8c536
2004-12-29 Keith Packard <keithp@keithp.com>
* fonts.conf.in:
Adopt some RedHat suggestions for standard font configuration.
* Makefile.am:
* configure.in:
* fc-case/CaseFolding.txt:
* fc-case/Makefile.am:
* fc-case/fc-case.c: (panic), (addFold), (ucs4_to_utf8),
(utf8_size), (addChar), (foldExtends), (case_fold_method_name),
(dump), (parseRaw), (caseFoldReadRaw), (main):
* fc-case/fccase.tmpl.h:
Add new helper program 'fc-case' to construct case folding
tables from standard Unicode CaseFolding.txt file
* src/fcint.h:
* src/fclist.c: (FcListValueHash):
* src/fcstr.c: (FcStrCaseWalkerInit), (FcStrCaseWalkerLong),
(FcStrCaseWalkerNext), (FcStrCaseWalkerNextIgnoreBlanks),
(FcStrCmpIgnoreCase), (FcStrCmpIgnoreBlanksAndCase),
(FcStrHashIgnoreCase), (FcStrIsAtIgnoreBlanksAndCase),
(FcStrIsAtIgnoreCase), (FcStrStrIgnoreCase):
Re-implement case insensitive functions with Unicode
aware versions (including full case folding mappings)
2004-12-13 Keith Packard <keithp@keithp.com>
reviewed by: Owen Taylor <otaylor@redhat.com>
......
......@@ -22,7 +22,7 @@
# PERFORMANCE OF THIS SOFTWARE.
DOCSRC=@DOCSRC@
SUBDIRS=fontconfig fc-lang fc-glyphname src fc-cache fc-list fc-match $(DOCSRC) test
SUBDIRS=fontconfig fc-lang fc-glyphname fc-case src fc-cache fc-list fc-match $(DOCSRC) test
EXTRA_DIST = \
fontconfig.pc.in \
......
......@@ -411,6 +411,7 @@ Makefile
fontconfig/Makefile
fc-lang/Makefile
fc-glyphname/Makefile
fc-case/Makefile
src/Makefile
src/fontconfig.def
fc-cache/Makefile
......
This diff is collapsed.
#
# $Id $
#
# Copyright © 2003 Keith Packard
#
# Permission to use, copy, modify, distribute, and sell this software and its
# documentation for any purpose is hereby granted without fee, provided that
# the above copyright notice appear in all copies and that both that
# copyright notice and this permission notice appear in supporting
# documentation, and that the name of Keith Packard not be used in
# advertising or publicity pertaining to distribution of the software without
# specific, written prior permission. Keith Packard makes no
# representations about the suitability of this software for any purpose. It
# is provided "as is" without express or implied warranty.
#
# KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
# INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
# EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
# CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
# DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
# TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
# PERFORMANCE OF THIS SOFTWARE.
#
INCLUDES=-I${top_srcdir}/src -I${top_srcdir} $(FREETYPE_CFLAGS) $(WARN_CFLAGS)
TMPL=fccase.tmpl.h
STMPL=${top_srcdir}/fc-case/${TMPL}
TARG=fccase.h
noinst_PROGRAMS=fc-case
noinst_HEADERS=$(TARG)
noinst_MANS=fc-case.man
fc_case_SRCS= \
fc-case.c \
fccaseint.h \
fccaseread.c
CASEFOLDING=CaseFolding.txt
SCASEFOLDING=${top_srcdir}/fc-case/CaseFolding.txt
EXTRA_DIST=$(TMPL) $(CASEFOLDING)
$(TARG): $(STMPL) fc-case $(SCASEFOLDING)
rm -f $(TARG)
./fc-case $(SCASEFOLDING) < $(STMPL) > $(TARG)
CLEANFILES=$(TARG)
/*
* $Id$
*
* Copyright © 2004 Keith Packard
*
* Permission to use, copy, modify, distribute, and sell this software and its
* documentation for any purpose is hereby granted without fee, provided that
* the above copyright notice appear in all copies and that both that
* copyright notice and this permission notice appear in supporting
* documentation, and that the name of Keith Packard not be used in
* advertising or publicity pertaining to distribution of the software without
* specific, written prior permission. Keith Packard makes no
* representations about the suitability of this software for any purpose. It
* is provided "as is" without express or implied warranty.
*
* KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
* INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
* EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
* CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
* DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
* TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
* PERFORMANCE OF THIS SOFTWARE.
*/
#include "fcint.h"
#include <ctype.h>
#define MAX_OUT 32
#define MAX_LINE 8192
typedef enum _caseFoldClass { CaseFoldCommon, CaseFoldFull, CaseFoldSimple, CaseFoldTurkic } CaseFoldClass;
typedef struct _caseFoldClassMap {
char *name;
CaseFoldClass class;
} CaseFoldClassMap;
static CaseFoldClassMap caseFoldClassMap[] = {
{ "C", CaseFoldCommon },
{ "F", CaseFoldFull },
{ "S", CaseFoldSimple },
{ "T", CaseFoldTurkic },
{ 0, 0 }
};
typedef struct _caseFoldRaw {
FcChar32 upper;
CaseFoldClass class;
int nout;
FcChar32 lower[MAX_OUT];
} CaseFoldRaw;
static void
panic (char *reason)
{
fprintf (stderr, "fc-case: panic %s\n", reason);
exit (1);
}
int maxExpand;
static FcCaseFold *folds;
int nfolds;
static FcCaseFold *
addFold (void)
{
if (folds)
folds = realloc (folds, (nfolds + 1) * sizeof (FcCaseFold));
else
folds = malloc (sizeof (FcCaseFold));
if (!folds)
panic ("out of memory");
return &folds[nfolds++];
}
static int
ucs4_to_utf8 (FcChar32 ucs4,
FcChar8 dest[FC_UTF8_MAX_LEN])
{
int bits;
FcChar8 *d = dest;
if (ucs4 < 0x80) { *d++= ucs4; bits= -6; }
else if (ucs4 < 0x800) { *d++= ((ucs4 >> 6) & 0x1F) | 0xC0; bits= 0; }
else if (ucs4 < 0x10000) { *d++= ((ucs4 >> 12) & 0x0F) | 0xE0; bits= 6; }
else if (ucs4 < 0x200000) { *d++= ((ucs4 >> 18) & 0x07) | 0xF0; bits= 12; }
else if (ucs4 < 0x4000000) { *d++= ((ucs4 >> 24) & 0x03) | 0xF8; bits= 18; }
else if (ucs4 < 0x80000000) { *d++= ((ucs4 >> 30) & 0x01) | 0xFC; bits= 24; }
else return 0;
for ( ; bits >= 0; bits-= 6) {
*d++= ((ucs4 >> bits) & 0x3F) | 0x80;
}
return d - dest;
}
static int
utf8_size (FcChar32 ucs4)
{
FcChar8 utf8[FC_UTF8_MAX_LEN];
return ucs4_to_utf8 (ucs4, utf8 );
}
static FcChar8 *foldChars;
int nfoldChars;
int maxFoldChars;
FcChar32 minFoldChar;
FcChar32 maxFoldChar;
static void
addChar (FcChar32 c)
{
FcChar8 utf8[FC_UTF8_MAX_LEN];
int len;
int i;
len = ucs4_to_utf8 (c, utf8);
if (foldChars)
foldChars = realloc (foldChars, (nfoldChars + len) * sizeof (FcChar8));
else
foldChars = malloc (sizeof (FcChar8) * len);
if (!foldChars)
panic ("out of memory");
for (i = 0; i < len; i++)
foldChars[nfoldChars + i] = utf8[i];
nfoldChars += len;
}
static int
foldExtends (FcCaseFold *fold, CaseFoldRaw *raw)
{
switch (fold->method) {
case FC_CASE_FOLD_RANGE:
if ((short) (raw->lower[0] - raw->upper) != fold->offset)
return 0;
if (raw->upper != fold->upper + fold->count)
return 0;
return 1;
case FC_CASE_FOLD_EVEN_ODD:
if ((short) (raw->lower[0] - raw->upper) != 1)
return 0;
if (raw->upper != fold->upper + fold->count + 1)
return 0;
return 1;
case FC_CASE_FOLD_FULL:
break;
}
return 0;
}
static char *
case_fold_method_name (FcChar16 method)
{
switch (method) {
case FC_CASE_FOLD_RANGE: return "FC_CASE_FOLD_RANGE,";
case FC_CASE_FOLD_EVEN_ODD: return "FC_CASE_FOLD_EVEN_ODD,";
case FC_CASE_FOLD_FULL: return "FC_CASE_FOLD_FULL,";
default: return "unknown";
}
}
static void
dump (void)
{
int i;
printf ( "#define FC_NUM_CASE_FOLD %d\n", nfolds);
printf ( "#define FC_NUM_CASE_FOLD_CHARS %d\n", nfoldChars);
printf ( "#define FC_MAX_CASE_FOLD_CHARS %d\n", maxFoldChars);
printf ( "#define FC_MAX_CASE_FOLD_EXPAND %d\n", maxExpand);
printf ( "#define FC_MIN_FOLD_CHAR 0x%08x\n", minFoldChar);
printf ( "#define FC_MAX_FOLD_CHAR 0x%08x\n", maxFoldChar);
printf ( "\n");
/*
* Dump out ranges
*/
printf ("static const FcCaseFold fcCaseFold[FC_NUM_CASE_FOLD] = {\n");
for (i = 0; i < nfolds; i++)
{
printf (" { 0x%08x, %-22s 0x%04x, %6d },\n",
folds[i].upper, case_fold_method_name (folds[i].method),
folds[i].count, folds[i].offset);
}
printf ("};\n\n");
/*
* Dump out "other" values
*/
printf ("static const FcChar8 fcCaseFoldChars[FC_NUM_CASE_FOLD_CHARS] = {\n");
for (i = 0; i < nfoldChars; i++)
{
printf ("0x%02x", foldChars[i]);
if (i != nfoldChars - 1)
{
if ((i & 0xf) == 0xf)
printf (",\n");
else
printf (",");
}
}
printf ("\n};\n");
}
/*
* Read the standard Unicode CaseFolding.txt file
*/
#define SEP "; \t\n"
static int
parseRaw (char *line, CaseFoldRaw *raw)
{
char *tok, *end;
int i;
if (!isxdigit (line[0]))
return 0;
/*
* Get upper case value
*/
tok = strtok (line, SEP);
if (!tok || tok[0] == '#')
return 0;
raw->upper = strtol (tok, &end, 16);
if (end == tok)
return 0;
/*
* Get class
*/
tok = strtok (NULL, SEP);
if (!tok || tok[0] == '#')
return 0;
for (i = 0; caseFoldClassMap[i].name; i++)
if (!strcmp (tok, caseFoldClassMap[i].name))
{
raw->class = caseFoldClassMap[i].class;
break;
}
if (!caseFoldClassMap[i].name)
return 0;
/*
* Get list of result characters
*/
for (i = 0; i < MAX_OUT; i++)
{
tok = strtok (NULL, SEP);
if (!tok || tok[0] == '#')
break;
raw->lower[i] = strtol (tok, &end, 16);
if (end == tok)
break;
}
if (i == 0)
return 0;
raw->nout = i;
return 1;
}
static int
caseFoldReadRaw (FILE *in, CaseFoldRaw *raw)
{
char line[MAX_LINE];
for (;;)
{
if (!fgets (line, sizeof (line) - 1, in))
return 0;
if (parseRaw (line, raw))
return 1;
}
}
int
main (int argc, char **argv)
{
FcCaseFold *fold = 0;
CaseFoldRaw raw;
int i;
FILE *caseFile;
char line[MAX_LINE];
int expand;
if (argc != 2)
panic ("usage: fc-case CaseFolding.txt");
caseFile = fopen (argv[1], "r");
if (!caseFile)
panic ("can't open case folding file");
while (caseFoldReadRaw (caseFile, &raw))
{
if (!minFoldChar)
minFoldChar = raw.upper;
maxFoldChar = raw.upper;
switch (raw.class) {
case CaseFoldCommon:
case CaseFoldFull:
if (raw.nout == 1)
{
if (fold && foldExtends (fold, &raw))
fold->count = raw.upper - fold->upper + 1;
else
{
fold = addFold ();
fold->upper = raw.upper;
fold->offset = raw.lower[0] - raw.upper;
if (fold->offset == 1)
fold->method = FC_CASE_FOLD_EVEN_ODD;
else
fold->method = FC_CASE_FOLD_RANGE;
fold->count = 1;
}
expand = utf8_size (raw.lower[0]) - utf8_size(raw.upper);
}
else
{
fold = addFold ();
fold->upper = raw.upper;
fold->method = FC_CASE_FOLD_FULL;
fold->offset = nfoldChars;
for (i = 0; i < raw.nout; i++)
addChar (raw.lower[i]);
fold->count = nfoldChars - fold->offset;
if (fold->count > maxFoldChars)
maxFoldChars = fold->count;
expand = fold->count - utf8_size (raw.upper);
}
if (expand > maxExpand)
maxExpand = expand;
break;
case CaseFoldSimple:
break;
case CaseFoldTurkic:
break;
}
}
/*
* Scan the input until the marker is found
*/
while (fgets (line, sizeof (line), stdin))
{
if (!strncmp (line, "@@@", 3))
break;
fputs (line, stdout);
}
/*
* Dump these tables
*/
dump ();
/*
* And flush out the rest of the input file
*/
while (fgets (line, sizeof (line), stdin))
fputs (line, stdout);
fflush (stdout);
exit (ferror (stdout));
}
/*
* $Id$
*
* Copyright © 2003 Keith Packard
*
* Permission to use, copy, modify, distribute, and sell this software and its
* documentation for any purpose is hereby granted without fee, provided that
* the above copyright notice appear in all copies and that both that
* copyright notice and this permission notice appear in supporting
* documentation, and that the name of Keith Packard not be used in
* advertising or publicity pertaining to distribution of the software without
* specific, written prior permission. Keith Packard makes no
* representations about the suitability of this software for any purpose. It
* is provided "as is" without express or implied warranty.
*
* KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
* INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
* EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
* CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
* DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
* TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
* PERFORMANCE OF THIS SOFTWARE.
*/
@@@
......@@ -73,8 +73,9 @@
-->
<alias>
<family>Bitstream Vera Serif</family>
<family>Times</family>
<family>Times New Roman</family>
<family>Thorndale AMT</family>
<family>Times</family>
<family>Nimbus Roman No9 L</family>
<family>Luxi Serif</family>
<family>Kochi Mincho</family>
......@@ -92,6 +93,7 @@
<family>Helvetica</family>
<family>Arial</family>
<family>Verdana</family>
<family>Albany AMT</family>
<family>Nimbus Sans L</family>
<family>Luxi Sans</family>
<family>Kochi Gothic</family>
......@@ -111,6 +113,7 @@
<family>Courier New</family>
<family>Andale Mono</family>
<family>Luxi Mono</family>
<family>Cumberland AMT</family>
<family>Nimbus Mono L</family>
<family>NSimSun</family>
<family>FreeMono</family>
......@@ -134,6 +137,48 @@
</edit>
</match>
<!--
URW provides metric and shape compatible fonts for these 3 Adobe families
Mark these as effective replacements by binding the replacement
family names strongly
-->
<match target="pattern">
<test name="family"><string>Times</string></test>
<edit name="family" mode="append" binding="same">
<string>Nimbus Roman No9 L</string>
</edit>
</match>
<match target="pattern">
<test name="family"><string>Helvetica</string></test>
<edit name="family" mode="append" binding="same">
<string>Nimbus Sans L</string>
</edit>
</match>
<match target="pattern">
<test name="family"><string>Courier</string></test>
<edit name="family" mode="append" binding="same">
<string>Nimbus Mono L</string>
</edit>
</match>
<!--
AMT provides metric and shape compatible fonts for these three web font
families. Bind them weakly as matching here is not as important as
with web fonts.
-->
<alias>
<family>Times New Roman</family>
<accept><family>Thorndale AMT</family></accept>
</alias
<alias>
<family>Arial</family>
<accept><family>Albany AMT</family></accept>
</alias
<alias>
<family>Courier New</family>
<accept><family>Cumberland AMT</family></accept>
</alias
<!--
Some Asian fonts misadvertise themselves as monospaced when
in fact they are dual-spaced (half and full). This makes
......@@ -214,22 +259,6 @@
<include ignore_missing="yes">conf.d</include>
<include ignore_missing="yes">local.conf</include>
<!--
Alias well known Type1 font names to metric equivalent TrueType fonts
-->
<alias>
<family>Times</family>
<accept><family>Times New Roman</family></accept>
</alias>
<alias>
<family>Helvetica</family>
<accept><family>Arial</family></accept>
</alias>
<alias>
<family>Courier</family>
<accept><family>Courier New</family></accept>
</alias>
<!--
Provide required aliases for standard names
-->
......@@ -238,8 +267,9 @@
<prefer>
<family>Bitstream Vera Serif</family>
<family>Times New Roman</family>
<family>Nimbus Roman No9 L</family>
<family>Thorndale AMT</family>
<family>Luxi Serif</family>
<family>Nimbus Roman No9 L</family>
<family>Times</family>
<family>Frank Ruehl</family>
<family>Kochi Mincho</family>
......@@ -254,9 +284,10 @@
<prefer>
<family>Bitstream Vera Sans</family>
<family>Verdana</family>
<family>Nimbus Sans L</family>
<family>Luxi Sans</family>
<family>Arial</family>
<family>Albany AMT</family>
<family>Luxi Sans</family>
<family>Nimbus Sans L</family>
<family>Helvetica</family>
<family>Nachlieli</family>
<family>Kochi Gothic</family>
......@@ -273,8 +304,10 @@
<family>Bitstream Vera Sans Mono</family>
<family>Andale Mono</family>
<family>Courier New</family>
<family>Cumberland AMT</family>
<family>Luxi Mono</family>
<family>Nimbus Mono L</family>
<family>Courier</family>
<family>Miriam Mono</family>
<family>Kochi Gothic</family>
<family>AR PL KaitiM GB</family>
......
......@@ -231,6 +231,37 @@ typedef struct _FcGlyphName {
FcChar8 name[1]; /* name extends beyond struct */
} FcGlyphName;
/*
* To perform case-insensitive string comparisons, a table
* is used which holds three different kinds of folding data.
*
* The first is a range of upper case values mapping to a range
* of their lower case equivalents. Within each range, the offset
* between upper and lower case is constant.
*
* The second is a range of upper case values which are interleaved
* with their lower case equivalents.
*
* The third is a set of raw unicode values mapping to a list
* of unicode values for comparison purposes. This allows conversion
* of ß to "ss" so that SS, ss and ß all match. A separate array
* holds the list of unicode values for each entry.
*
* These are packed into a single table. Using a binary search,
* the appropriate entry can be located.
*/
#define FC_CASE_FOLD_RANGE 0
#define FC_CASE_FOLD_EVEN_ODD 1
#define FC_CASE_FOLD_FULL 2
typedef struct _FcCaseFold {
FcChar32 upper;
FcChar16 method : 2;
FcChar16 count : 14;
short offset; /* lower - upper for RANGE, table id for FULL */
} FcCaseFold;
#define FC_MAX_FILE_LEN 4096
/*
......@@ -746,4 +777,7 @@ FcStrUsesHome (const FcChar8 *s);
FcChar8 *
FcStrLastSlash (const FcChar8 *path);
FcChar32
FcStrHashIgnoreCase (const FcChar8 *s);
#endif /* _FC_INT_H_ */
......@@ -219,20 +219,6 @@ FcListPatternMatchAny (const FcPattern *p,
return FcTrue;
}
static FcChar32
FcListStringHash (const FcChar8 *s)
{
FcChar32 h = 0;
FcChar8 c;
while ((c = *s++))
{
c = FcToLower (c);
h = ((h << 3) ^ (h >> 3)) ^ c;
}
return h;
}
static FcChar32
FcListMatrixHash (const FcMatrix *m)
{
......@@ -255,7 +241,7 @@ FcListValueHash (FcValue v)
case FcTypeDouble:
return (FcChar32) (int) v.u.d;
case FcTypeString:
return FcListStringHash (v.u.s);
return FcStrHashIgnoreCase (v.u.s);
case FcTypeBool:
return (FcChar32) v.u.b;
case FcTypeMatrix:
......
......@@ -63,16 +63,147 @@ FcStrFree (FcChar8 *s)
free (s);
}
#include "../fc-case/fccase.h"
#define FcCaseFoldUpperCount(cf) \
((cf)->method == FC_CASE_FOLD_FULL ? 1 : (cf)->count)
#define FC_STR_CANON_BUF_LEN 1024
typedef struct _FcCaseWalker {
const FcChar8 *read;
const FcChar8 *src;
int len;
FcChar8 utf8[FC_MAX_CASE_FOLD_CHARS + 1];
} FcCaseWalker;
static void
FcStrCaseWalkerInit (const FcChar8 *src, FcCaseWalker *w)
{
w->src = src;
w->read = 0;
w->len = strlen (src);
}
static FcChar8
FcStrCaseWalkerLong (FcCaseWalker *w, FcChar8 r)
{
FcChar32 ucs4;
int slen;
slen = FcUtf8ToUcs4 (w->src - 1, &ucs4, w->len);
if (slen <= 0)
return r;
if (FC_MIN_FOLD_CHAR <= ucs4 && ucs4 <= FC_MAX_FOLD_CHAR)
{
int min = 0;
int max = FC_NUM_CASE_FOLD;
while (min <= max)
{
int mid = (min + max) >> 1;
FcChar32 low = fcCaseFold[mid].upper;
FcChar32 high = low + FcCaseFoldUpperCount (&fcCaseFold[mid]);
if (high <= ucs4)
min = mid + 1;
else if (ucs4 < low)
max = mid - 1;
else
{
const FcCaseFold *fold = &fcCaseFold[mid];
int dlen;
switch (fold->method) {
case FC_CASE_FOLD_EVEN_ODD:
if ((ucs4 & 1)