Commit c1382a3d authored by Keith Packard's avatar Keith Packard

Add fc-lang program to generate language coverage tables

parent 08440706
#ifdef UseInstalled
/* building outside the tree, use private defines */
#include "../local.def"
#endif
INCLUDES=$(FREETYPE2INCLUDES) $(LIBXML2INCLUDES) -I.. -I../src
LOCAL_LIBRARIES=FontconfigClientLibs
DEPLIBS=FontconfigClientDepLibs
SRCS=fc-lang.c
OBJS=fc-lang.o
TARG=fclang.c
TMPL=fclang.tmpl.c
#
# Basic ISO 639-1 two letter language names
ORTH1=ab.orth ar.orth az.orth ba.orth be.orth bg.orth br.orth ca.orth co.orth\
cs.orth da.orth de.orth el.orth en.orth eo.orth es.orth et.orth eu.orth\
fi.orth fo.orth fr.orth fy.orth ga.orth gd.orth gl.orth he.orth hr.orth\
hu.orth hy.orth is.orth it.orth ja.orth ka.orth kk.orth kl.orth ko.orth\
la.orth lt.orth lv.orth mk.orth mo.orth mt.orth nl.orth no.orth oc.orth\
pl.orth pt.orth rm.orth ro.orth ru.orth sh.orth sk.orth sl.orth sq.orth\
sr.orth sv.orth th.orth tr.orth uk.orth vo.orth yi.orth zh_cn.orth\
zh_tw.orth
#
# ISO 639-2 adds many more three letter language names
#
ORTH2=chr.orth
ORTH=$(ORTH1) $(ORTH2)
all:: $(TARG)
clean::
$(RM) $(TARG)
$(TARG): fc-lang $(ORTH) $(TMPL)
./fc-lang $(ORTH) < $(TMPL) > $(TARG)
ComplexProgramTarget(fc-lang)
LinkBuildBinary(ProgramTargetName(fc-lang))
# Abkhazia (AB)
00ab
00bb
0401
040f
0410-044f
0451
045f
049e-049f
04a6-04a9
04ac-04ad
04b2-04b7
04bc-04bf
04d8
04d9
04e0-0re1
#2039-203a # angle quotes
# Arabic (AR)
060C # ARABIC COMMA
061B # ARABIC SEMICOLON
061F # ARABIC QUESTION MARK
0621 # ARABIC LETTER HAMZA
0622 # ARABIC LETTER ALEF WITH MADDA ABOVE
0623 # ARABIC LETTER ALEF WITH HAMZA ABOVE
0624 # ARABIC LETTER WAW WITH HAMZA ABOVE
0625 # ARABIC LETTER ALEF WITH HAMZA BELOW
0626 # ARABIC LETTER YEH WITH HAMZA ABOVE
0627 # ARABIC LETTER ALEF
0628 # ARABIC LETTER BEH
0629 # ARABIC LETTER TEH MARBUTA
062A # ARABIC LETTER TEH
062B # ARABIC LETTER THEH
062C # ARABIC LETTER JEEM
062D # ARABIC LETTER HAH
062E # ARABIC LETTER KHAH
062F # ARABIC LETTER DAL
0630 # ARABIC LETTER THAL
0631 # ARABIC LETTER REH
0632 # ARABIC LETTER ZAIN
0633 # ARABIC LETTER SEEN
0634 # ARABIC LETTER SHEEN
0635 # ARABIC LETTER SAD
0636 # ARABIC LETTER DAD
0637 # ARABIC LETTER TAH
0638 # ARABIC LETTER ZAH
0639 # ARABIC LETTER AIN
063A # ARABIC LETTER GHAIN
0640 # ARABIC TATWEEL
0641 # ARABIC LETTER FEH
0642 # ARABIC LETTER QAF
0643 # ARABIC LETTER KAF
0644 # ARABIC LETTER LAM
0645 # ARABIC LETTER MEEM
0646 # ARABIC LETTER NOON
0647 # ARABIC LETTER HEH
0648 # ARABIC LETTER WAW
0649 # ARABIC LETTER ALEF MAKSURA
064A # ARABIC LETTER YEH
064B # ARABIC FATHATAN
064C # ARABIC DAMMATAN
064D # ARABIC KASRATAN
064E # ARABIC FATHA
064F # ARABIC DAMMA
0650 # ARABIC KASRA
0651 # ARABIC SHADDA
0652 # ARABIC SUKUN
# Azerbaijani (AZ)
0040-005a
0060-007a
00c4
00c7
00d6
00dc
00e4
00e7
00f6
00fc
011e-011f
0130-0131
015e-015f
018f
0259
02bc
0408
0410-044f
0458
0492-0493
049c-049d
04ae-04af
04b8-04bb
04d8-04d9
04e8-04e9
# Bashkir (BA)
00ab
00bb
0401
0410-044f
0451
0492-0493
0498-0499
04a0-04a3
04aa-04ab
04ae-04af
04d8-04d9
04e8-04e9
#2018-2019 # single quotes
#201c-201d # double quotes
#2039-203a # angle quotes
# Byelorussian (BE)
00ab
00bb
0401
0406
040e
0410-044f
0451
0456
045e
#2039-203a # angle quotes
# Bulgarian (BG)
0400
04ad
0410-042a
042c
042e-044a
044c
044e-044f
0450
045d
0462-0463
046a-046b
# Breton (BR)
0027
0040-005a
0060-007a
00ab
00bb
00c2
00ca
00d1
00d9
00dc
00e2
00ea
00f1
00f9
00fc
#2019-201a # single quote and comma
# Catalan (CA)
0040-005a
0060-007a
00b7
00c0
00c7
00c8-00c9
00cd
00cf
00d2-00d3
00da
00dc
00e0
00e7
00e8-00e9
00ed
00ef
00f2-00f3
00fa
00fc
013f-0140
#2018-2019 # single quotes
#201c-201d # double quotes
# Cherokee (chr)
13a0-13f4
# Corsican (CO)
include fr.orth
# Czech (CS)
0040-005a
0060-007a
00c1
00c4
00c9
00cd
00d3
00d6
00da
00dc-00dd
00e1
00e4
00e9
00ed
00f3
00f6
00fa
00fc-00fd
010c-010f
011a-011b
0147-0148
0158-0159
0160-0161
0164-0165
016e-016f
017d-017e
# Danish (DA)
0040-005a
0060-007a
00ab
00bb
00c0-00c2
00c4-00cb
00cd
00d0
00d3-00d4
00d6
00d8
00da
00dc-00de
00e0-00e2
00e4-00eb
00ed
00f0
00f3-00f4
00f6
00f8
00fa
00fc-00fe
0152-0153
01fa-01ff
#2039-203a # angle quotes
# German (DE)
00ab
00bb
0040-005a
0060-007a
00c4
00d6
00dc
00df
00e4
00f6
00fc
#2018 # single quotes
#201a # single quotes
#201c # double quotes
#201e # double quotes
#2039-203a # angle quotes
# Greek (EL)
0374-0375
037a
037e
0384-038a
038c
038e-03a1
03a3-03ce
03d7
03da-03e1
#1f00-1f15 # only for polytonic orthography below...
#1f18-1f1d
#1f20-1f45
#1f48-1f4d
#1f50-1f57
#1f59
#1f5b
#1f5d
#1f5f-157d
#1f80-1fb4
#1fb6-1fc4
#1fc6-1fd3
#1fd6-1fdb
#1fdd-1fef
#1ff2-1ff4
#1ff6-1ffe
# English (EN)
0040-005a
0060-007a
00c0
00c7-00cb
00cf
00d1
00d4
00d6
00d0
00d7-00db
00df
00f1
00f4
00f6
#2018-2019 # single quotes
#201c-201d # double quotes
# Esperanto (EO)
0040-005a
0060-007a
0108-0109
011c-011d
0124-0125
0134-0135
015c-015d
016c-016d
# Spanish (ES)
0040-005a
0060-007a
00a1
00bf
00c1
00c9
00cd
00d1
00d3
00da
00dc
00e1
00e9
00ed
00f1
00f3
00fa
00fc
# Estonian (ET)
0040-005a
0060-007a
00c4
00d5-00d6
00dc
00e4
00f5-00f6
00fc
0160-0161
017d-017e
#2018 # single quote
#201a # single quote
#201c # double quote
#201e # double quote
# Basque (EU)
0040-005a
0060-007a
00d1
00dc
00f1
00fc
0154-0155
/*
* $XFree86$
*
* Copyright 2002 Keith Packard, member of The XFree86 Project, Inc.
*
* Permission to use, copy, modify, distribute, and sell this software and its
* documentation for any purpose is hereby granted without fee, provided that
* the above copyright notice appear in all copies and that both that
* copyright notice and this permission notice appear in supporting
* documentation, and that the name of Keith Packard not be used in
* advertising or publicity pertaining to distribution of the software without
* specific, written prior permission. Keith Packard makes no
* representations about the suitability of this software for any purpose. It
* is provided "as is" without express or implied warranty.
*
* KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
* INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
* EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
* CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
* DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
* TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
* PERFORMANCE OF THIS SOFTWARE.
*/
#include "fcint.h"
/*
* fc-lang
*
* Read a set of language orthographies and build C declarations for
* charsets which can then be used to identify which languages are
* supported by a given font. Note that it would be nice if
* this could be done while compiling the library, but this
* code uses a number of routines from the library. It's
* expediant to just ship the pre-built version along with the
* source orthographies.
*/
static void
fatal (char *file, int lineno, char *msg)
{
fprintf (stderr, "%s:%d: %s\n", file, lineno, msg);
exit (1);
}
static char *
get_line (FILE *f, char *line, int *lineno)
{
char *hash;
if (!fgets (line, 1024, f))
return 0;
++(*lineno);
hash = strchr (line, '#');
if (hash)
*hash = '\0';
if (line[0] == '\0' || line[0] == '\n' || line[0] == '\032' || line[0] == '\r')
return get_line (f, line, lineno);
return line;
}
/*
* build a single charset from a source file
*
* The file format is quite simple, either
* a single hex value or a pair separated with a dash
*
* Comments begin with '#'
*/
static FcCharSet *
scan (FILE *f, char *file)
{
FcCharSet *c = 0;
FcCharSet *n;
int start, end, ucs4;
char line[1024];
int lineno = 0;
while (get_line (f, line, &lineno))
{
if (!strncmp (line, "include", 7))
{
file = strchr (line, ' ');
while (*file == ' ')
file++;
end = strlen (file);
if (file[end-1] == '\n')
file[end-1] = '\0';
f = fopen (file, "r");
if (!f)
fatal (file, 0, "can't open");
c = scan (f, file);
fclose (f);
return c;
}
if (strchr (line, '-'))
{
if (sscanf (line, "%x-%x", &start, &end) != 2)
fatal (file, lineno, "parse error");
}
else
{
if (sscanf (line, "%x", &start) != 1)
fatal (file, lineno, "parse error");
end = start;
}
if (!c)
c = FcCharSetCreate ();
for (ucs4 = start; ucs4 <= end; ucs4++)
{
if (!FcCharSetAddChar (c, ucs4))
fatal (file, lineno, "out of memory");
}
}
n = FcCharSetFreeze (c);
FcCharSetDestroy (c);
return n;
}
/*
* Convert a file name into a name suitable for C declarations
*/
static char *
get_name (char *file)
{
char *name;
char *dot;
dot = strchr (file, '.');
if (!dot)
dot = file + strlen(file);
name = malloc (dot - file + 1);
strncpy (name, file, dot - file);
name[dot-file] = '\0';
return name;
}
/*
* Convert a C name into a language name
*/
static char *
get_lang (char *name)
{
char *lang = malloc (strlen (name) + 1);
char *l = lang;
char c;
while ((c = *name++))
{
if (isupper (c))
c = tolower (c);
if (c == '_')
c = '-';
if (c == ' ')
continue;
*l++ = c;
}
*l++ = '\0';
return lang;
}
int
main (int argc, char **argv)
{
FcCharSet *sets[1024];
char *names[1024];
FILE *f;
int i = 0;
FcCharLeaf **leaves, **sleaves;
int total_leaves = 0;
int l, sl, tl;
char line[1024];
while (*++argv)
{
f = fopen (*argv, "r");
if (!f)
fatal (*argv, 0, strerror (errno));
sets[i] = scan (f, *argv);
names[i] = get_name (*argv);
total_leaves += sets[i]->num;
i++;
fclose (f);
}
sets[i] = 0;
leaves = malloc (total_leaves * sizeof (FcCharLeaf *));
tl = 0;
/*
* Find unique leaves
*/
for (i = 0; sets[i]; i++)
{
sleaves = sets[i]->leaves;
for (sl = 0; sl < sets[i]->num; sl++)
{
for (l = 0; l < tl; l++)
if (leaves[l] == sleaves[sl])
break;
if (l == tl)
leaves[tl++] = sleaves[sl];
}
}
/*
* Scan the input until the marker is found
*/
while (fgets (line, sizeof (line), stdin))
{
if (!strncmp (line, "@@@", 3))
break;
fputs (line, stdout);
}
printf ("/* total size: %d unique leaves: %d */\n\n",
total_leaves, tl);
/*
* Dump leaves
*/
printf ("static const FcCharLeaf leaves[%d] = {\n", tl);
for (l = 0; l < tl; l++)
{
printf (" { { /* %d */", l);
for (i = 0; i < 256/32; i++)
{
if (i % 4 == 0)
printf ("\n ");
printf (" 0x%08x,", leaves[l]->map[i]);
}
printf ("\n } },\n");
}
printf ("};\n\n");
printf ("#define L(n) ((FcCharLeaf *) &leaves[n])\n\n");
/*
* Dump arrays
*/
for (i = 0; sets[i]; i++)
{
int n;
printf ("static const FcCharLeaf *leaves_%s[%d] = {\n",
names[i], sets[i]->num);
for (n = 0; n < sets[i]->num; n++)
{
if (n % 8 == 0)
printf (" ");
for (l = 0; l < tl; l++)
if (leaves[l] == sets[i]->leaves[n])
break;
if (l == tl)
fatal (names[i], 0, "can't find leaf");
printf (" L(%3d),", l);
if (n % 8 == 7)
printf ("\n");
}
if (n % 8 != 0)
printf ("\n");
printf ("};\n\n");
printf ("static const FcChar16 numbers_%s[%d] = {\n",
names[i], sets[i]->num);
for (n = 0; n < sets[i]->num; n++)
{
if (n % 8 == 0)
printf (" ");
printf (" 0x%04x,", sets[i]->numbers[n]);
if (n % 8 == 7)
printf ("\n");
}
if (n % 8 != 0)
printf ("\n");
printf ("};\n\n");
}
printf ("#undef L\n\n");
/*
* Dump sets
*/
printf ("static const FcLangCharSet fcLangCharSets[] = {\n");
for (i = 0; sets[i]; i++)
{
printf (" { (FcChar8 *) \"%s\",\n"
" { 1, FcTrue, %d, "
"(FcCharLeaf **) leaves_%s, "
"(FcChar16 *) numbers_%s } },\n",
get_lang(names[i]),
sets[i]->num, names[i], names[i]);
}
printf ("};\n\n");
while (fgets (line, sizeof (line), stdin))
fputs (line, stdout);
fflush (stdout);
exit (ferror (stdout));
}
.\"
.\" Copyright © 2002 Keith Packard, member of The XFree86 Project, Inc.
.\"
.\" Permission to use, copy, modify, distribute, and sell this software and its
.\" documentation for any purpose is hereby granted without fee, provided that
.\" the above copyright notice appear in all copies and that both that
.\" copyright notice and this permission notice appear in supporting
.\" documentation, and that the name of Keith Packard not be used in
.\" advertising or publicity pertaining to distribution of the software without
.\" specific, written prior permission. Keith Packard makes no
.\" representations about the suitability of this software for any purpose. It
.\" is provided "as is" without express or implied warranty.
.\"
.\" KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
.\" INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
.\" EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
.\" CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
.\" DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
.\" TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
.\" PERFORMANCE OF THIS SOFTWARE.
.\"
.\"
.\" $XFree86: xc/programs/fc-lang/fc-lang.man,v 1.3 2001/02/09 03:47:56 tsi Exp $
.\"
.TH FC-LANG 1 __vendorversion__
.SH NAME
fc-lang, fclang.c \- create an database of language orthographies
.SH SYNOPSIS
.B "fc-lang"
.RI [ language-coverage
\|.\|.\|. ]
.SH DESCRIPTION
.I Fc-lang
builds the fclang.c file used in the fontconfig library to automatically
determine language coverage for fonts which don't contain this information.
.SH FILES
.TP 15
.B fclang.tmpl.c
The template file in which the tables are inserted
.SH "SEE ALSO"
fontconfig(3)