fclang.c 15.6 KB
Newer Older
1
/*
2
 * $RCSId: xc/lib/fontconfig/src/fclang.c,v 1.7 2002/08/26 23:34:31 keithp Exp $
3
 *
4
 * Copyright © 2002 Keith Packard
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
 *
 * Permission to use, copy, modify, distribute, and sell this software and its
 * documentation for any purpose is hereby granted without fee, provided that
 * the above copyright notice appear in all copies and that both that
 * copyright notice and this permission notice appear in supporting
 * documentation, and that the name of Keith Packard not be used in
 * advertising or publicity pertaining to distribution of the software without
 * specific, written prior permission.  Keith Packard makes no
 * representations about the suitability of this software for any purpose.  It
 * is provided "as is" without express or implied warranty.
 *
 * KEITH PACKARD DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
 * EVENT SHALL KEITH PACKARD BE LIABLE FOR ANY SPECIAL, INDIRECT OR
 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
 * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
 * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
 * PERFORMANCE OF THIS SOFTWARE.
 */

#include "fcint.h"

typedef struct {
28
    const FcChar8    	lang[8];
29
    const FcCharSet	charset;
30 31
} FcLangCharSet;

32 33 34 35 36
typedef struct {
    int begin;
    int end;
} FcLangCharSetRange;

37
#include "../fc-lang/fclang.h"
38

39 40
struct _FcLangSet {
    FcChar32	map[NUM_LANG_SET_MAP];
41
    FcStrSet	*extra;
42 43 44 45 46 47 48
};

#define FcLangSetBitSet(ls, id)	((ls)->map[(id)>>5] |= ((FcChar32) 1 << ((id) & 0x1f)))
#define FcLangSetBitGet(ls, id) (((ls)->map[(id)>>5] >> ((id) & 0x1f)) & 1)

FcLangSet *
FcFreeTypeLangSet (const FcCharSet  *charset, 
49
		   const FcChar8    *exclusiveLang)
50
{
51
    int		    i, j;
52 53
    FcChar32	    missing;
    const FcCharSet *exclusiveCharset = 0;
54
    FcLangSet	    *ls;
55

56
    if (exclusiveLang)
57
	exclusiveCharset = FcLangGetCharSet (exclusiveLang);
58 59 60
    ls = FcLangSetCreate ();
    if (!ls)
	return 0;
61 62 63 64 65 66
    if (FcDebug() & FC_DBG_LANGSET) 
    {
	printf ("font charset\n");
	FcCharSetPrint (charset);
	printf ("\n");
    }
67 68
    for (i = 0; i < NUM_LANG_CHAR_SET; i++)
    {
69 70 71 72 73 74 75
	if (FcDebug() & FC_DBG_LANGSET) 
	{
	    printf ("%s charset\n", fcLangCharSets[i].lang);
	    FcCharSetPrint (&fcLangCharSets[i].charset);
	    printf ("\n");
	}
	
76 77 78 79 80 81
	/*
	 * Check for Han charsets to make fonts
	 * which advertise support for a single language
	 * not support other Han languages
	 */
	if (exclusiveCharset &&
82
	    FcFreeTypeIsExclusiveLang (fcLangCharSets[i].lang))
83
	{
84 85 86 87
	    if (fcLangCharSets[i].charset.num != exclusiveCharset->num)
		continue;

	    for (j = 0; j < fcLangCharSets[i].charset.num; j++)
88 89
		if (FcCharSetLeaf(&fcLangCharSets[i].charset, j) != 
		    FcCharSetLeaf(exclusiveCharset, j))
90
		    continue;
91
	}
92 93
	missing = FcCharSetSubtractCount (&fcLangCharSets[i].charset, charset);
        if (FcDebug() & FC_DBG_SCANV)
94 95 96 97 98 99 100 101 102
	{
	    if (missing && missing < 10)
	    {
		FcCharSet   *missed = FcCharSetSubtract (&fcLangCharSets[i].charset, 
							 charset);
		FcChar32    ucs4;
		FcChar32    map[FC_CHARSET_MAP_SIZE];
		FcChar32    next;

103
		printf ("\n%s(%u) ", fcLangCharSets[i].lang, missing);
104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121
		printf ("{");
		for (ucs4 = FcCharSetFirstPage (missed, map, &next);
		     ucs4 != FC_CHARSET_DONE;
		     ucs4 = FcCharSetNextPage (missed, map, &next))
		{
		    int	    i, j;
		    for (i = 0; i < FC_CHARSET_MAP_SIZE; i++)
			if (map[i])
			{
			    for (j = 0; j < 32; j++)
				if (map[i] & (1 << j))
				    printf (" %04x", ucs4 + i * 32 + j);
			}
		}
		printf (" }\n\t");
		FcCharSetDestroy (missed);
	    }
	    else
122
		printf ("%s(%u) ", fcLangCharSets[i].lang, missing);
123
	}
124
	if (!missing)
125
	    FcLangSetBitSet (ls, i);
126
    }
127

128 129
    if (FcDebug() & FC_DBG_SCANV)
	printf ("\n");
130 131 132
    
    
    return ls;
133 134
}

135
#define FcLangEnd(c)	((c) == '-' || (c) == '\0')
136 137 138 139 140

FcLangResult
FcLangCompare (const FcChar8 *s1, const FcChar8 *s2)
{
    FcChar8	    c1, c2;
141 142
    FcLangResult    result = FcLangDifferentLang;

143 144 145 146
    for (;;)
    {
	c1 = *s1++;
	c2 = *s2++;
147
	
148 149 150
	c1 = FcToLower (c1);
	c2 = FcToLower (c2);
	if (c1 != c2)
151 152
	{
	    if (FcLangEnd (c1) && FcLangEnd (c2))
153
		result = FcLangDifferentTerritory;
154 155 156 157 158
	    return result;
	}
	else if (!c1)
	    return FcLangEqual;
	else if (c1 == '-')
159
	    result = FcLangDifferentTerritory;
160 161 162
    }
}

163
/*
164
 * Return FcTrue when super contains sub. 
165
 *
166 167 168
 * super contains sub if super and sub have the same
 * language and either the same country or one
 * is missing the country
169 170 171
 */

static FcBool
172
FcLangContains (const FcChar8 *super, const FcChar8 *sub)
173 174 175 176 177
{
    FcChar8	    c1, c2;

    for (;;)
    {
178 179
	c1 = *super++;
	c2 = *sub++;
180 181 182 183 184
	
	c1 = FcToLower (c1);
	c2 = FcToLower (c2);
	if (c1 != c2)
	{
185
	    /* see if super has a country while sub is mising one */
186 187
	    if (c1 == '-' && c2 == '\0')
		return FcTrue;
188 189 190
	    /* see if sub has a country while super is mising one */
	    if (c1 == '\0' && c2 == '-')
		return FcTrue;
191 192 193 194 195 196 197
	    return FcFalse;
	}
	else if (!c1)
	    return FcTrue;
    }
}

198
const FcCharSet *
199
FcLangGetCharSet (const FcChar8 *lang)
200 201 202
{
    int		i;
    int		country = -1;
203

204 205 206 207 208
    for (i = 0; i < NUM_LANG_CHAR_SET; i++)
    {
	switch (FcLangCompare (lang, fcLangCharSets[i].lang)) {
	case FcLangEqual:
	    return &fcLangCharSets[i].charset;
209
	case FcLangDifferentTerritory:
210 211 212 213 214 215 216 217
	    if (country == -1)
		country = i;
	default:
	    break;
	}
    }
    if (country == -1)
	return 0;
218
    return &fcLangCharSets[country].charset;
219
}
220

221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236
FcStrSet *
FcGetLangs (void)
{
    FcStrSet *langs;
    int	i;

    langs = FcStrSetCreate();
    if (!langs)
	return 0;

    for (i = 0; i < NUM_LANG_CHAR_SET; i++)
	FcStrSetAdd (langs, fcLangCharSets[i].lang);

    return langs;
}

237 238 239 240 241 242 243 244 245 246
FcLangSet *
FcLangSetCreate (void)
{
    FcLangSet	*ls;

    ls = malloc (sizeof (FcLangSet));
    if (!ls)
	return 0;
    FcMemAlloc (FC_MEM_LANGSET, sizeof (FcLangSet));
    memset (ls->map, '\0', sizeof (ls->map));
247
    ls->extra = 0;
248 249 250 251 252 253
    return ls;
}

void
FcLangSetDestroy (FcLangSet *ls)
{
254 255
    if (ls->extra)
	FcStrSetDestroy (ls->extra);
256 257 258 259 260 261 262 263 264 265 266 267 268
    FcMemFree (FC_MEM_LANGSET, sizeof (FcLangSet));
    free (ls);
}

FcLangSet *
FcLangSetCopy (const FcLangSet *ls)
{
    FcLangSet	*new;

    new = FcLangSetCreate ();
    if (!new)
	goto bail0;
    memcpy (new->map, ls->map, sizeof (new->map));
269
    if (ls->extra)
270 271 272 273
    {
	FcStrList	*list;
	FcChar8		*extra;
	
274 275
	new->extra = FcStrSetCreate ();
	if (!new->extra)
276 277
	    goto bail1;

278
	list = FcStrListCreate (ls->extra);	
279 280 281 282
	if (!list)
	    goto bail1;
	
	while ((extra = FcStrListNext (list)))
283
	    if (!FcStrSetAdd (new->extra, extra))
284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299
	    {
		FcStrListDone (list);
		goto bail1;
	    }
	FcStrListDone (list);
    }
    return new;
bail1:
    FcLangSetDestroy (new);
bail0:
    return 0;
}

static int
FcLangSetIndex (const FcChar8 *lang)
{
300 301
    int	    low, high, mid = 0;
    int	    cmp = 0;
302
    FcChar8 firstChar = FcToLower(lang[0]); 
303
    FcChar8 secondChar = firstChar ? FcToLower(lang[1]) : '\0';
304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322
    
    if (firstChar < 'a')
    {
	low = 0;
	high = fcLangCharSetRanges[0].begin;
    }
    else if(firstChar > 'z')
    {
	low = fcLangCharSetRanges[25].begin;
	high = NUM_LANG_CHAR_SET - 1;
    }
    else
    {
	low = fcLangCharSetRanges[firstChar - 'a'].begin;
	high = fcLangCharSetRanges[firstChar - 'a'].end;
	/* no matches */
	if (low > high)
	    return -low; /* next entry after where it would be */
    }
323 324 325 326

    while (low <= high)
    {
	mid = (high + low) >> 1;
327 328 329 330 331
	if(fcLangCharSets[mid].lang[0] != firstChar)
	    cmp = FcStrCmpIgnoreCase(fcLangCharSets[mid].lang, lang);
	else
	{   /* fast path for resolving 2-letter languages (by far the most common) after
	     * finding the first char (probably already true because of the hash table) */
332 333 334 335
	    cmp = fcLangCharSets[mid].lang[1] - secondChar;
	    if (cmp == 0 && 
		(fcLangCharSets[mid].lang[2] != '\0' || 
		 lang[2] != '\0'))
336
	    {
337 338
		cmp = FcStrCmpIgnoreCase(fcLangCharSets[mid].lang+2, 
					 lang+2);
339 340 341
	    }
	}
	if (cmp == 0)
342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363
	    return mid;
	if (cmp < 0)
	    low = mid + 1;
	else
	    high = mid - 1;
    }
    if (cmp < 0)
	mid++;
    return -(mid + 1);
}

FcBool
FcLangSetAdd (FcLangSet *ls, const FcChar8 *lang)
{
    int	    id;

    id = FcLangSetIndex (lang);
    if (id >= 0)
    {
	FcLangSetBitSet (ls, id);
	return FcTrue;
    }
364
    if (!ls->extra)
365
    {
366 367
	ls->extra = FcStrSetCreate ();
	if (!ls->extra)
368 369
	    return FcFalse;
    }
370
    return FcStrSetAdd (ls->extra, lang);
371 372 373 374 375 376 377 378 379 380
}

FcLangResult
FcLangSetHasLang (const FcLangSet *ls, const FcChar8 *lang)
{
    int		    id;
    FcLangResult    best, r;
    int		    i;

    id = FcLangSetIndex (lang);
381 382 383
    if (id < 0)
	id = -id - 1;
    else if (FcLangSetBitGet (ls, id))
384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401
	return FcLangEqual;
    best = FcLangDifferentLang;
    for (i = id - 1; i >= 0; i--)
    {
	r = FcLangCompare (lang, fcLangCharSets[i].lang);
	if (r == FcLangDifferentLang)
	    break;
	if (FcLangSetBitGet (ls, i) && r < best)
	    best = r;
    }
    for (i = id; i < NUM_LANG_CHAR_SET; i++)
    {
	r = FcLangCompare (lang, fcLangCharSets[i].lang);
	if (r == FcLangDifferentLang)
	    break;
	if (FcLangSetBitGet (ls, i) && r < best)
	    best = r;
    }
402
    if (ls->extra)
403
    {
404
	FcStrList	*list = FcStrListCreate (ls->extra);
405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443
	FcChar8		*extra;
	
	if (list)
	{
	    while (best > FcLangEqual && (extra = FcStrListNext (list)))
	    {
		r = FcLangCompare (lang, extra);
		if (r < best)
		    best = r;
	    }
	    FcStrListDone (list);
	}
    }
    return best;
}

static FcLangResult
FcLangSetCompareStrSet (const FcLangSet *ls, FcStrSet *set)
{
    FcStrList	    *list = FcStrListCreate (set);
    FcLangResult    r, best = FcLangDifferentLang;
    FcChar8	    *extra;

    if (list)
    {
	while (best > FcLangEqual && (extra = FcStrListNext (list)))
	{
	    r = FcLangSetHasLang (ls, extra);
	    if (r < best)
		best = r;
	}
	FcStrListDone (list);
    }
    return best;
}

FcLangResult
FcLangSetCompare (const FcLangSet *lsa, const FcLangSet *lsb)
{
444
    int		    i, j;
445 446 447 448 449 450
    FcLangResult    best, r;

    for (i = 0; i < NUM_LANG_SET_MAP; i++)
	if (lsa->map[i] & lsb->map[i])
	    return FcLangEqual;
    best = FcLangDifferentLang;
451 452 453 454 455
    for (j = 0; j < NUM_COUNTRY_SET; j++)
	for (i = 0; i < NUM_LANG_SET_MAP; i++)
	    if ((lsa->map[i] & fcLangCountrySets[j][i]) &&
		(lsb->map[i] & fcLangCountrySets[j][i]))
	    {
456
		best = FcLangDifferentTerritory;
457 458
		break;
	    }
459
    if (lsa->extra)
460
    {
461
	r = FcLangSetCompareStrSet (lsb, lsa->extra);
462 463 464
	if (r < best)
	    best = r;
    }
465
    if (best > FcLangEqual && lsb->extra)
466
    {
467
	r = FcLangSetCompareStrSet (lsa, lsb->extra);
468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485
	if (r < best)
	    best = r;
    }
    return best;
}

/*
 * Used in computing values -- mustn't allocate any storage
 */
FcLangSet *
FcLangSetPromote (const FcChar8 *lang)
{
    static FcLangSet	ls;
    static FcStrSet	strs;
    static FcChar8	*str;
    int			id;

    memset (ls.map, '\0', sizeof (ls.map));
486
    ls.extra = 0;
487 488 489 490 491 492 493
    id = FcLangSetIndex (lang);
    if (id > 0)
    {
	FcLangSetBitSet (&ls, id);
    }
    else
    {
494
	ls.extra = &strs;
495 496
	strs.num = 1;
	strs.size = 1;
497
	strs.strs = &str;
498
	strs.ref = 1;
499 500 501 502 503 504 505 506 507 508 509 510 511
	str = (FcChar8 *) lang;
    }
    return &ls;
}

FcChar32
FcLangSetHash (const FcLangSet *ls)
{
    FcChar32	h = 0;
    int		i;

    for (i = 0; i < NUM_LANG_SET_MAP; i++)
	h ^= ls->map[i];
512 513
    if (ls->extra)
	h ^= ls->extra->num;
514 515 516 517 518 519
    return h;
}

FcLangSet *
FcNameParseLangSet (const FcChar8 *string)
{
520 521
    FcChar8	    lang[32],c;
    int i;
522 523 524 525 526 527
    FcLangSet	    *ls;

    ls = FcLangSetCreate ();
    if (!ls)
	goto bail0;

528
    for(;;)
529
    {
530
	for(i = 0; i < 31;i++)
531
	{
532 533 534 535
	    c = *string++;
	    if(c == '\0' || c == '|')
		break; /* end of this code */
	    lang[i] = c;
536
	}
537 538 539 540 541
	lang[i] = '\0';
	if (!FcLangSetAdd (ls, lang))
	    goto bail1;
	if(c == '\0')
	    break;
542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573
    }
    return ls;
bail1:
    FcLangSetDestroy (ls);
bail0:
    return 0;
}

FcBool
FcNameUnparseLangSet (FcStrBuf *buf, const FcLangSet *ls)
{
    int		i, bit;
    FcChar32	bits;
    FcBool	first = FcTrue;

    for (i = 0; i < NUM_LANG_SET_MAP; i++)
    {
	if ((bits = ls->map[i]))
	{
	    for (bit = 0; bit <= 31; bit++)
		if (bits & (1 << bit))
		{
		    int id = (i << 5) | bit;
		    if (!first)
			if (!FcStrBufChar (buf, '|'))
			    return FcFalse;
		    if (!FcStrBufString (buf, fcLangCharSets[id].lang))
			return FcFalse;
		    first = FcFalse;
		}
	}
    }
574
    if (ls->extra)
575
    {
576
	FcStrList   *list = FcStrListCreate (ls->extra);
577 578 579 580 581 582 583 584
	FcChar8	    *extra;

	if (!list)
	    return FcFalse;
	while ((extra = FcStrListNext (list)))
	{
	    if (!first)
		if (!FcStrBufChar (buf, '|'))
585 586
                {
                    FcStrListDone (list);
587
		    return FcFalse;
588
                }
589
	    if (!FcStrBufString (buf, extra))
590 591 592 593
                {
                    FcStrListDone (list);
                    return FcFalse;
                }
594 595
	    first = FcFalse;
	}
596
        FcStrListDone (list);
597 598 599 600 601 602 603 604 605 606 607 608 609 610
    }
    return FcTrue;
}

FcBool
FcLangSetEqual (const FcLangSet *lsa, const FcLangSet *lsb)
{
    int	    i;

    for (i = 0; i < NUM_LANG_SET_MAP; i++)
    {
	if (lsa->map[i] != lsb->map[i])
	    return FcFalse;
    }
611
    if (!lsa->extra && !lsb->extra)
612
	return FcTrue;
613 614
    if (lsa->extra && lsb->extra)
	return FcStrSetEqual (lsa->extra, lsb->extra);
615 616
    return FcFalse;
}
617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647

static FcBool
FcLangSetContainsLang (const FcLangSet *ls, const FcChar8 *lang)
{
    int		    id;
    int		    i;

    id = FcLangSetIndex (lang);
    if (id < 0)
	id = -id - 1;
    else if (FcLangSetBitGet (ls, id))
	return FcTrue;
    /*
     * search up and down among equal languages for a match
     */
    for (i = id - 1; i >= 0; i--)
    {
	if (FcLangCompare (fcLangCharSets[i].lang, lang) == FcLangDifferentLang)
	    break;
	if (FcLangSetBitGet (ls, i) &&
	    FcLangContains (fcLangCharSets[i].lang, lang))
	    return FcTrue;
    }
    for (i = id; i < NUM_LANG_CHAR_SET; i++)
    {
	if (FcLangCompare (fcLangCharSets[i].lang, lang) == FcLangDifferentLang)
	    break;
	if (FcLangSetBitGet (ls, i) &&
	    FcLangContains (fcLangCharSets[i].lang, lang))
	    return FcTrue;
    }
648
    if (ls->extra)
649
    {
650
	FcStrList	*list = FcStrListCreate (ls->extra);
651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703
	FcChar8		*extra;
	
	if (list)
	{
	    while ((extra = FcStrListNext (list)))
	    {
		if (FcLangContains (extra, lang))
		    break;
	    }
	    FcStrListDone (list);
    	    if (extra)
		return FcTrue;
	}
    }
    return FcFalse;
}

/*
 * return FcTrue if lsa contains every language in lsb
 */
FcBool
FcLangSetContains (const FcLangSet *lsa, const FcLangSet *lsb)
{
    int		    i, j;
    FcChar32	    missing;

    if (FcDebug() & FC_DBG_MATCHV)
    {
	printf ("FcLangSet "); FcLangSetPrint (lsa);
	printf (" contains "); FcLangSetPrint (lsb);
	printf ("\n");
    }
    /*
     * check bitmaps for missing language support
     */
    for (i = 0; i < NUM_LANG_SET_MAP; i++)
    {
	missing = lsb->map[i] & ~lsa->map[i];
	if (missing)
	{
	    for (j = 0; j < 32; j++)
		if (missing & (1 << j)) 
		{
		    if (!FcLangSetContainsLang (lsa,
						fcLangCharSets[i*32 + j].lang))
		    {
			if (FcDebug() & FC_DBG_MATCHV)
			    printf ("\tMissing bitmap %s\n", fcLangCharSets[i*32+j].lang);
			return FcFalse;
		    }
		}
	}
    }
704
    if (lsb->extra)
705
    {
706
	FcStrList   *list = FcStrListCreate (lsb->extra);
707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726
	FcChar8	    *extra;

	if (list)
	{
	    while ((extra = FcStrListNext (list)))
	    {
		if (!FcLangSetContainsLang (lsa, extra))
		{
		    if (FcDebug() & FC_DBG_MATCHV)
			printf ("\tMissing string %s\n", extra);
		    break;
		}
	    }
	    FcStrListDone (list);
	    if (extra)
		return FcFalse;
	}
    }
    return FcTrue;
}
727

728 729
FcBool
FcLangSetSerializeAlloc (FcSerialize *serialize, const FcLangSet *l)
730
{
731 732
    if (!FcSerializeAlloc (serialize, l, sizeof (FcLangSet)))
	return FcFalse;
733
    return FcTrue;
734
}
735

736
FcLangSet *
737
FcLangSetSerialize(FcSerialize *serialize, const FcLangSet *l)
738
{
739
    FcLangSet	*l_serialize = FcSerializePtr (serialize, l);
740

741 742 743 744
    if (!l_serialize)
	return NULL;
    *l_serialize = *l;
    return l_serialize;
745
}
746 747 748
#define __fclang__
#include "fcaliastail.h"
#undef __fclang__