fc-lang.c 13.7 KB
Newer Older
1
/*
2
 * fontconfig/fc-lang/fc-lang.c
3
 *
4
 * Copyright © 2002 Keith Packard
5 6 7 8 9
 *
 * Permission to use, copy, modify, distribute, and sell this software and its
 * documentation for any purpose is hereby granted without fee, provided that
 * the above copyright notice appear in all copies and that both that
 * copyright notice and this permission notice appear in supporting
10
 * documentation, and that the name of the author(s) not be used in
11
 * advertising or publicity pertaining to distribution of the software without
12
 * specific, written prior permission.  The authors make no
13 14 15
 * representations about the suitability of this software for any purpose.  It
 * is provided "as is" without express or implied warranty.
 *
16
 * THE AUTHOR(S) DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17
 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
18
 * EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY SPECIAL, INDIRECT OR
19 20 21 22 23 24
 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
 * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
 * PERFORMANCE OF THIS SOFTWARE.
 */

25 26
#include "fccharset.c"
#include "fcstr.c"
27
#include "fcserialize.c"
28 29 30 31 32 33

/*
 * fc-lang
 *
 * Read a set of language orthographies and build C declarations for
 * charsets which can then be used to identify which languages are
34 35 36 37
 * supported by a given font.  Note that this uses some utilities
 * from the fontconfig library, so the necessary file is simply
 * included in this compilation.  A couple of extra utility
 * functions are also needed in slightly modified form
38 39
 */

40
FcPrivate void
41
FcCacheObjectReference (void *object FC_UNUSED)
42 43 44 45
{
}

FcPrivate void
46
FcCacheObjectDereference (void *object FC_UNUSED)
47 48 49
{
}

50
FcPrivate FcChar8 *
51
FcLangNormalize (const FcChar8 *lang FC_UNUSED)
52 53 54 55
{
    return NULL;
}

56 57
int FcDebugVal;

58 59 60
FcChar8 *
FcConfigHome (void)
{
61
    return (FcChar8 *) getenv ("HOME");
62 63
}

64
static void 
Patrick Lam's avatar
Patrick Lam committed
65
fatal (const char *file, int lineno, const char *msg)
66
{
Patrick Lam's avatar
Patrick Lam committed
67 68 69
    if (lineno)
	fprintf (stderr, "%s:%d: %s\n", file, lineno, msg);
    else
70
	fprintf (stderr, "%s: %s\n", file, msg);
71 72 73 74
    exit (1);
}

static char *
75
get_line (FILE *f, char *buf, int *lineno)
76 77
{
    char    *hash;
78
    char    *line;
79
    int	    end;
80 81 82

next:
    line = buf;
83 84 85 86 87 88
    if (!fgets (line, 1024, f))
	return 0;
    ++(*lineno);
    hash = strchr (line, '#');
    if (hash)
	*hash = '\0';
89

90 91
    while (line[0] && isspace (line[0]))
      line++;
92 93 94 95
    end = strlen (line);
    while (end > 0 && isspace (line[end-1]))
      line[--end] = '\0';

96 97 98
    if (line[0] == '\0' || line[0] == '\n' || line[0] == '\r')
      goto next;

99 100 101
    return line;
}

102
static char	*dir = 0;
103

104
static FILE *
105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121
scanopen (char *file)
{
    FILE    *f;

    f = fopen (file, "r");
    if (!f && dir)
    {
	char	path[1024];
	
	strcpy (path, dir);
	strcat (path, "/");
	strcat (path, file);
	f = fopen (path, "r");
    }
    return f;
}

122 123 124 125 126 127 128 129 130
/*
 * build a single charset from a source file
 *
 * The file format is quite simple, either
 * a single hex value or a pair separated with a dash
 *
 * Comments begin with '#'
 */

131
static FcCharSet *
132
scan (FILE *f, char *file, FcCharSetFreezer *freezer)
133
{
134
    FcCharSet	    *c = 0;
135
    FcCharSet	    *n;
136
    FcBool	    del;
137
    int		    start, end, ucs4;
138 139
    char	    buf[1024];
    char	    *line;
140
    int		    lineno = 0;
141

142
    while ((line = get_line (f, buf, &lineno)))
143 144 145
    {
	if (!strncmp (line, "include", 7))
	{
146 147 148 149 150
	    FILE *included_f;
	    char *included_file;
	    included_file = strchr (line, ' ');
            if (!included_file)
                fatal (file, lineno,
151
                       "invalid syntax, expected: include filename");
152 153 154 155 156 157 158 159 160 161 162 163 164
	    while (isspace(*included_file))
		included_file++;
	    included_f = scanopen (included_file);
	    if (!included_f)
		fatal (included_file, 0, "can't open");
	    n = scan (included_f, included_file, freezer);
	    fclose (included_f);
	    if (!c)
		c = FcCharSetCreate ();
	    if (!FcCharSetMerge (c, n, NULL))
		fatal (file, lineno, "out of memory");
	    FcCharSetDestroy (n);
	    continue;
165
	}
166 167 168 169 170 171
	del = FcFalse;
	if (line[0] == '-')
	{
	  del = FcTrue;
	  line++;
	}
172 173 174 175 176
	if (strchr (line, '-'))
	{
	    if (sscanf (line, "%x-%x", &start, &end) != 2)
		fatal (file, lineno, "parse error");
	}
177 178 179 180 181
	else if (strstr (line, ".."))
	{
	    if (sscanf (line, "%x..%x", &start, &end) != 2)
		fatal (file, lineno, "parse error");
	}
182 183 184 185 186 187 188 189 190 191
	else
	{
	    if (sscanf (line, "%x", &start) != 1)
		fatal (file, lineno, "parse error");
	    end = start;
	}
	if (!c)
	    c = FcCharSetCreate ();
	for (ucs4 = start; ucs4 <= end; ucs4++)
	{
192
	    if (!((del ? FcCharSetDelChar : FcCharSetAddChar) (c, ucs4)))
193 194 195
		fatal (file, lineno, "out of memory");
	}
    }
Behdad Esfahbod's avatar
Behdad Esfahbod committed
196
    n = (FcCharSet *) FcCharSetFreeze (freezer, c);
197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230
    FcCharSetDestroy (c);
    return n;
}

/*
 * Convert a file name into a name suitable for C declarations
 */
static char *
get_name (char *file)
{
    char    *name;
    char    *dot;

    dot = strchr (file, '.');
    if (!dot)
	dot = file + strlen(file);
    name = malloc (dot - file + 1);
    strncpy (name, file, dot - file);
    name[dot-file] = '\0';
    return name;
}

/*
 * Convert a C name into a language name
 */
static char *
get_lang (char *name)
{
    char    *lang = malloc (strlen (name) + 1);
    char    *l = lang;
    char    c;

    while ((c = *name++))
    {
Keith Packard's avatar
Keith Packard committed
231 232
	if (isupper ((int) (unsigned char) c))
	    c = tolower ((int) (unsigned char) c);
233 234 235 236 237 238 239 240 241 242
	if (c == '_')
	    c = '-';
	if (c == ' ')
	    continue;
	*l++ = c;
    }
    *l++ = '\0';
    return lang;
}

243 244 245 246 247
typedef struct _Entry {
    int id;
    char *file;
} Entry;

248 249
static int compare (const void *a, const void *b)
{
250 251
    const Entry *as = a, *bs = b;
    return FcStrCmpIgnoreCase ((const FcChar8 *) as->file, (const FcChar8 *) bs->file);
252 253
}

254 255 256
#define MAX_LANG	    1024
#define MAX_LANG_SET_MAP    ((MAX_LANG + 31) / 32)

257
#define BitSet(map, i)   ((map)[(entries[i].id)>>5] |= ((FcChar32) 1 << ((entries[i].id) & 0x1f)))
258

259
int
260
main (int argc FC_UNUSED, char **argv)
261
{
262
    static Entry	entries[MAX_LANG + 1];
263
    static FcCharSet	*sets[MAX_LANG];
264 265 266 267
    static int		duplicate[MAX_LANG];
    static int		country[MAX_LANG];
    static char		*names[MAX_LANG];
    static char		*langs[MAX_LANG];
268
    static int		off[MAX_LANG];
269
    FILE	*f;
270
    int		ncountry = 0;
271
    int		i = 0;
272
    int		nsets = 0;
Patrick Lam's avatar
Patrick Lam committed
273
    int		argi;
274
    FcCharLeaf	**leaves;
275
    int		total_leaves = 0;
276
    int		l, sl, tl, tn;
277 278
    static char		line[1024];
    static FcChar32	map[MAX_LANG_SET_MAP];
279
    int		num_lang_set_map;
280 281 282
    int		setRangeStart[26];
    int		setRangeEnd[26];
    FcChar8	setRangeChar;
283
    FcCharSetFreezer	*freezer;
284
    
285 286 287
    freezer = FcCharSetFreezerCreate ();
    if (!freezer)
	fatal (argv[0], 0, "out of memory");
Patrick Lam's avatar
Patrick Lam committed
288 289
    argi = 1;
    while (argv[argi])
290
    {
Patrick Lam's avatar
Patrick Lam committed
291
	if (!strcmp (argv[argi], "-d"))
292
	{
Patrick Lam's avatar
Patrick Lam committed
293 294
	    argi++;
	    dir = argv[argi++];
295 296
	    continue;
	}
297
	if (i == MAX_LANG)
Patrick Lam's avatar
Patrick Lam committed
298
	    fatal (argv[0], 0, "Too many languages");
299 300 301
	entries[i].id = i;
	entries[i].file = argv[argi++];
	i++;
302
    }
303 304
    entries[i].file = 0;
    qsort (entries, i, sizeof (Entry), compare);
305
    i = 0;
306
    while (entries[i].file)
307
    {
308
	f = scanopen (entries[i].file);
309
	if (!f)
310 311 312
	    fatal (entries[i].file, 0, strerror (errno));
	sets[i] = scan (f, entries[i].file, freezer);
	names[i] = get_name (entries[i].file);
313 314 315 316
	langs[i] = get_lang(names[i]);
	if (strchr (langs[i], '-'))
	    country[ncountry++] = i;

317 318 319 320
	total_leaves += sets[i]->num;
	i++;
	fclose (f);
    }
321
    nsets = i;
322 323 324 325 326 327 328 329 330 331 332
    sets[i] = 0;
    leaves = malloc (total_leaves * sizeof (FcCharLeaf *));
    tl = 0;
    /*
     * Find unique leaves
     */
    for (i = 0; sets[i]; i++)
    {
	for (sl = 0; sl < sets[i]->num; sl++)
	{
	    for (l = 0; l < tl; l++)
333
		if (leaves[l] == FcCharSetLeaf(sets[i], sl))
334 335
		    break;
	    if (l == tl)
336
		leaves[tl++] = FcCharSetLeaf(sets[i], sl);
337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352
	}
    }

    /*
     * Scan the input until the marker is found
     */
    
    while (fgets (line, sizeof (line), stdin))
    {
	if (!strncmp (line, "@@@", 3))
	    break;
	fputs (line, stdout);
    }
    
    printf ("/* total size: %d unique leaves: %d */\n\n",
	    total_leaves, tl);
353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370

    /*
     * Find duplicate charsets
     */
    duplicate[0] = -1;
    for (i = 1; sets[i]; i++)
    {
	int j;

	duplicate[i] = -1;
	for (j = 0; j < i; j++)
	    if (sets[j] == sets[i])
	    {
		duplicate[i] = j;
		break;
	    }
    }

371 372 373 374 375 376 377 378 379 380
    tn = 0;
    for (i = 0; sets[i]; i++) {
	if (duplicate[i] >= 0)
	    continue;
	off[i] = tn;
	tn += sets[i]->num;
    }

    printf ("#define LEAF0       (%d * sizeof (FcLangCharSet))\n", nsets);
    printf ("#define OFF0        (LEAF0 + %d * sizeof (FcCharLeaf))\n", tl);
Alan Coopersmith's avatar
Alan Coopersmith committed
381
    printf ("#define NUM0        (OFF0 + %d * sizeof (uintptr_t))\n", tn);
382
    printf ("#define SET(n)      (n * sizeof (FcLangCharSet) + offsetof (FcLangCharSet, charset))\n");
Alan Coopersmith's avatar
Alan Coopersmith committed
383
    printf ("#define OFF(s,o)    (OFF0 + o * sizeof (uintptr_t) - SET(s))\n");
384 385 386
    printf ("#define NUM(s,n)    (NUM0 + n * sizeof (FcChar16) - SET(s))\n");
    printf ("#define LEAF(o,l)   (LEAF0 + l * sizeof (FcCharLeaf) - (OFF0 + o * sizeof (intptr_t)))\n");
    printf ("#define fcLangCharSets (fcLangData.langCharSets)\n");
387
    printf ("#define fcLangCharSetIndices (fcLangData.langIndices)\n");
388
    printf ("#define fcLangCharSetIndicesInv (fcLangData.langIndicesInv)\n");
389 390 391 392 393
    printf ("\n");
    
    printf ("static const struct {\n"
	    "    FcLangCharSet  langCharSets[%d];\n"
	    "    FcCharLeaf     leaves[%d];\n"
Alan Coopersmith's avatar
Alan Coopersmith committed
394
	    "    uintptr_t      leaf_offsets[%d];\n"
395
	    "    FcChar16       numbers[%d];\n"
396
	    "    FcChar%s       langIndices[%d];\n"
397
	    "    FcChar%s       langIndicesInv[%d];\n"
398
	    "} fcLangData = {\n",
399
	    nsets, tl, tn, tn,
400
	    nsets < 256 ? "8 " : "16", nsets, nsets < 256 ? "8 " : "16", nsets);
401
	
402
    /*
403
     * Dump sets
404
     */
405 406

    printf ("{\n");
407 408
    for (i = 0; sets[i]; i++)
    {
409 410 411 412 413
	int	j = duplicate[i];

	if (j < 0)
	    j = i;

414
	printf ("    { \"%s\", "
415 416 417
		" { FC_REF_CONSTANT, %d, OFF(%d,%d), NUM(%d,%d) } }, /* %d */\n",
		langs[i],
		sets[j]->num, i, off[j], i, off[j], i);
418
    }
419
    printf ("},\n");
420
    
421
    /*
422
     * Dump leaves
423
     */
424 425
    printf ("{\n");
    for (l = 0; l < tl; l++)
426
    {
427 428
	printf ("    { { /* %d */", l);
	for (i = 0; i < 256/32; i++)
429
	{
430 431 432
	    if (i % 4 == 0)
		printf ("\n   ");
	    printf (" 0x%08x,", leaves[l]->map[i]);
433
	}
434
	printf ("\n    } },\n");
435
    }
436
    printf ("},\n");
437

438 439 440 441
    /*
     * Dump leaves
     */
    printf ("{\n");
442 443 444 445 446 447
    for (i = 0; sets[i]; i++)
    {
	int n;
	
	if (duplicate[i] >= 0)
	    continue;
448
	printf ("    /* %s */\n", names[i]);
449 450
	for (n = 0; n < sets[i]->num; n++)
	{
451
	    if (n % 4 == 0)
452 453
		printf ("   ");
	    for (l = 0; l < tl; l++)
454
		if (leaves[l] == FcCharSetLeaf(sets[i], n))
455 456 457
		    break;
	    if (l == tl)
		fatal (names[i], 0, "can't find leaf");
458 459
	    printf (" LEAF(%3d,%3d),", off[i], l);
	    if (n % 4 == 3)
460 461
		printf ("\n");
	}
462
	if (n % 4 != 0)
463
	    printf ("\n");
464
    }
465 466
    printf ("},\n");
	
467

468
    printf ("{\n");
469 470 471
    for (i = 0; sets[i]; i++)
    {
	int n;
472
	
473 474
	if (duplicate[i] >= 0)
	    continue;
475
	printf ("    /* %s */\n", names[i]);
476 477 478 479
	for (n = 0; n < sets[i]->num; n++)
	{
	    if (n % 8 == 0)
		printf ("   ");
480
	    printf (" 0x%04x,", FcCharSetNumbers (sets[i])[n]);
481 482 483 484 485 486
	    if (n % 8 == 7)
		printf ("\n");
	}
	if (n % 8 != 0)
	    printf ("\n");
    }
487 488
    printf ("},\n");

489
    /* langIndices */
490 491 492 493 494
    printf ("{\n");
    for (i = 0; sets[i]; i++)
    {
	printf ("    %d, /* %s */\n", entries[i].id, names[i]);
    }
495 496 497 498 499 500 501 502 503 504 505
    printf ("},\n");

    /* langIndicesInv */
    printf ("{\n");
    {
	static int		entries_inv[MAX_LANG];
	for (i = 0; sets[i]; i++)
	  entries_inv[entries[i].id] = i;
	for (i = 0; sets[i]; i++)
	    printf ("    %d, /* %s */\n", entries_inv[i], names[entries_inv[i]]);
    }
506
    printf ("}\n");
507

508
    printf ("};\n\n");
509

510 511 512 513 514 515 516 517
    printf ("#define NUM_LANG_CHAR_SET	%d\n", i);
    num_lang_set_map = (i + 31) / 32;
    printf ("#define NUM_LANG_SET_MAP	%d\n", num_lang_set_map);
    /*
     * Dump indices with country codes
     */
    if (ncountry)
    {
518
	int	c;
519 520 521 522 523 524 525 526
	int	ncountry_ent = 0;
	printf ("\n");
	printf ("static const FcChar32 fcLangCountrySets[][NUM_LANG_SET_MAP] = {\n");
	for (c = 0; c < ncountry; c++)
	{
	    i = country[c];
	    if (i >= 0)
	    {
527
		int lang = strchr (langs[i], '-') - langs[i];
528 529 530 531 532 533 534 535 536
		int d, k;

		for (k = 0; k < num_lang_set_map; k++)
		    map[k] = 0;

		BitSet (map, i);
		for (d = c + 1; d < ncountry; d++)
		{
		    int j = country[d];
537
		    if (j >= 0 && !strncmp (langs[j], langs[i], lang + 1))
538 539 540 541 542 543 544 545 546
		    {
			BitSet(map, j);
			country[d] = -1;
		    }
		}
		printf ("    {");
		for (k = 0; k < num_lang_set_map; k++)
		    printf (" 0x%08x,", map[k]);
		printf (" }, /* %*.*s */\n",
547
			lang, lang, langs[i]);
548 549 550 551 552 553 554
		++ncountry_ent;
	    }
	}
	printf ("};\n\n");
	printf ("#define NUM_COUNTRY_SET %d\n", ncountry_ent);
    }
    
555

556 557 558 559 560 561 562 563 564 565 566 567 568
    /*
     * Find ranges for each letter for faster searching
     */
    setRangeChar = 'a';
    memset(setRangeStart, '\0', sizeof (setRangeStart));
    memset(setRangeEnd, '\0', sizeof (setRangeEnd));
    for (i = 0; sets[i]; i++)
    {
	char	c = names[i][0];
	
	while (setRangeChar <= c && c <= 'z')
	    setRangeStart[setRangeChar++ - 'a'] = i;
    }
569 570 571
    while (setRangeChar <= 'z') /* no language code starts with these letters */
	setRangeStart[setRangeChar++ - 'a'] = i;

572 573 574 575
    for (setRangeChar = 'a'; setRangeChar < 'z'; setRangeChar++)
	setRangeEnd[setRangeChar - 'a'] = setRangeStart[setRangeChar+1-'a'] - 1;
    setRangeEnd[setRangeChar - 'a'] = i - 1;
    
576 577 578
    /*
     * Dump sets start/finish for the fastpath
     */
579
    printf ("\n");
580
    printf ("static const FcLangCharSetRange  fcLangCharSetRanges[] = {\n");
581
	printf ("\n");
582 583 584 585 586 587 588 589
    for (setRangeChar = 'a'; setRangeChar <= 'z' ; setRangeChar++)
    {
	printf ("    { %d, %d }, /* %c */\n",
		setRangeStart[setRangeChar - 'a'],
		setRangeEnd[setRangeChar - 'a'], setRangeChar);
    }
    printf ("};\n\n");
 
590 591 592 593 594 595
    while (fgets (line, sizeof (line), stdin))
	fputs (line, stdout);
    
    fflush (stdout);
    exit (ferror (stdout));
}