update-mime-database.c 82 KB
Newer Older
1
2
3
4
5
6
#include <config.h>

#define N_(x) x
#define _(x) (x)

#include <string.h>
7
#include <ctype.h>
8
9
10
11
#include <stdlib.h>
#include <unistd.h>
#include <stdio.h>
#include <glib.h>
12
#include <glib/gprintf.h>
13
#include <errno.h>
14
15
16
17
18
#include <dirent.h>
#include <libxml/parser.h>
#include <libxml/tree.h>
#include <sys/stat.h>
#include <sys/types.h>
19
#include <fcntl.h>
20

21
#define XML_NS XML_XML_NAMESPACE
22
#define XMLNS_NS "http://www.w3.org/2000/xmlns/"
23
#define FREE_NS (xmlChar *)"http://www.freedesktop.org/standards/shared-mime-info"
24
25

#define COPYING								\
26
	     N_("Copyright (C) 2003 Thomas Leonard.\n"			\
27
28
29
30
31
32
33
		"update-mime-database comes with ABSOLUTELY NO WARRANTY,\n" \
		"to the extent permitted by law.\n"			\
		"You may redistribute copies of update-mime-database\n"	\
		"under the terms of the GNU General Public License.\n"	\
		"For more information about these matters, "		\
		"see the file named COPYING.\n")

34
35
#define MIME_ERROR g_quark_from_static_string("mime-error-quark")

36
37
38
#define NOGLOBS "__NOGLOBS__"
#define NOMAGIC "__NOMAGIC__"

39
40
41
42
43
44
45
46
#ifndef PATH_SEPARATOR
# ifdef _WIN32
#  define PATH_SEPARATOR ";"
# else
#  define PATH_SEPARATOR ":"
# endif
#endif

47
48
49
/* This is the list of directories to scan when finding old type files to
 * delete. It is also used to warn about invalid MIME types.
 */
50
51
52
53
54
55
56
57
58
59
const char *media_types[] = {
	"text",
	"application",
	"image",
	"audio",
	"inode",
	"video",
	"message",
	"model",
	"multipart",
60
	"x-content",
61
	"x-epoc",
62
	"x-scheme-handler",
63
64
};

65
/* Represents a MIME type */
66
typedef struct _Type Type;
67
68

/* A parsed <magic> element */
69
typedef struct _Magic Magic;
70
71

/* A parsed <match> element */
72
typedef struct _Match Match;
73

74
75
76
77
78
79
/* A parsed <treemagic> element */
typedef struct _TreeMagic TreeMagic;

/* A parsed <treematch> element */
typedef struct _TreeMatch TreeMatch;

Bastien Nocera's avatar
Bastien Nocera committed
80
81
82
/* A parsed <glob> element */
typedef struct _Glob Glob;

83
84
85
86
struct _Type {
	char *media;
	char *subtype;

87
	/* Contains xmlNodes for elements that are being copied to the output.
Thomas Leonard's avatar
Thomas Leonard committed
88
89
	 * That is, <comment>, <sub-class-of> and <alias> nodes, and anything
	 * with an unknown namespace.
90
	 */
91
	xmlDoc	*output;
92
};
Thomas Leonard's avatar
Thomas Leonard committed
93

Bastien Nocera's avatar
Bastien Nocera committed
94
95
96
97
struct _Glob {
	int weight;
	char *pattern;
	Type *type;
98
	gboolean noglob;
99
	gboolean case_sensitive;
Bastien Nocera's avatar
Bastien Nocera committed
100
101
};

102
103
104
105
struct _Magic {
	int priority;
	Type *type;
	GList *matches;
106
	gboolean nomagic;
107
108
109
110
111
112
113
114
115
116
117
118
};

struct _Match {
	long range_start;
	int range_length;
	char word_size;
	int data_length;
	char *data;
	char *mask;
	GList *matches;
};

119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
struct _TreeMagic {
	int priority;
	Type *type;
	GList *matches;
};

struct _TreeMatch {
	char *path;
	gboolean match_case;
	gboolean executable;
	gboolean non_empty;
	gint type;
	char *mimetype;

	GList *matches;
};

136
137
/* Maps MIME type names to Types */
static GHashTable *types = NULL;
138

Thomas Leonard's avatar
Thomas Leonard committed
139
140
141
/* Maps "namespaceURI localName" strings to Types */
static GHashTable *namespace_hash = NULL;

142
143
/* Maps glob patterns to Types */
static GHashTable *globs_hash = NULL;
Thomas Leonard's avatar
Thomas Leonard committed
144
145

/* 'magic' nodes */
146
147
static GPtrArray *magic_array = NULL;

148
149
150
/* 'treemagic' nodes */
static GPtrArray *tree_magic_array = NULL;

151
152
153
154
155
156
/* Maps MIME type names to superclass names */
static GHashTable *subclass_hash = NULL;

/* Maps aliases to Types */
static GHashTable *alias_hash = NULL;

Bastien Nocera's avatar
Bastien Nocera committed
157
158
159
160
161
162
/* Maps MIME type names to icon names */
static GHashTable *icon_hash = NULL;

/* Maps MIME type names to icon names */
static GHashTable *generic_icon_hash = NULL;

163
164
165
/* Lists enabled log levels */
static GLogLevelFlags enabled_log_levels = G_LOG_LEVEL_ERROR | G_LOG_LEVEL_CRITICAL | G_LOG_LEVEL_WARNING;

166
167
/* Static prototypes */
static Magic *magic_new(xmlNode *node, Type *type, GError **error);
168
static Match *match_new(void);
169

170
171
static TreeMagic *tree_magic_new(xmlNode *node, Type *type, GError **error);

172
173
174
175
176
177
178
179
180
181
static void g_log_handler (const gchar   *log_domain,
			   GLogLevelFlags log_level,
			   const gchar   *message,
			   gpointer       unused_data)
{
    if (log_level & enabled_log_levels) {
        g_printf("%s\n", message);
    }
}

182
183
184
185
186
187
188
189
190
191
192
static void
fatal_gerror (GError *error) G_GNUC_NORETURN;

static void
fatal_gerror (GError *error)
{
	g_assert(error != NULL);
	g_printerr("%s\n", error->message);
	g_error_free(error);
	exit (EXIT_FAILURE);
}
193

194
195
static void usage(const char *name)
{
196
	g_fprintf(stderr, _("Usage: %s [-hvV] MIME-DIR\n"), name);
197
198
199
200
201
202
203
204
205
}

static void free_type(gpointer data)
{
	Type *type = (Type *) data;

	g_free(type->media);
	g_free(type->subtype);

206
	xmlFreeDoc(type->output);
207
208
209
210

	g_free(type);
}

211
212
213
214
215
216
217
218
219
220
/* Ugly workaround to shut up gcc4 warnings about signedness issues
 * (xmlChar is typedef'ed to unsigned char)
 */
static char *my_xmlGetNsProp (xmlNodePtr node, 
			      const char *name,
			      const xmlChar *namespace)
{
	return (char *)xmlGetNsProp (node, (xmlChar *)name, namespace);
}

221
222
223
224
225
/* If we've seen this type before, return the existing object.
 * Otherwise, create a new one. Checks that the name looks sensible;
 * if not, sets error and returns NULL.
 * Also warns about unknown media types, but does not set error.
 */
226
static Type *get_type(const char *name, GError **error)
227
{
228
229
	xmlNode *root;
	xmlNs *ns;
230
231
	const char *slash;
	Type *type;
232
	int i;
233
234
235
236

	slash = strchr(name, '/');
	if (!slash || strchr(slash + 1, '/'))
	{
237
238
		g_set_error(error, MIME_ERROR, 0,
				_("Invalid MIME-type '%s'"), name);
239
240
241
242
243
244
245
246
247
248
249
250
		return NULL;
	}

	type = g_hash_table_lookup(types, name);
	if (type)
		return type;

	type = g_new(Type, 1);
	type->media = g_strndup(name, slash - name);
	type->subtype = g_strdup(slash + 1);
	g_hash_table_insert(types, g_strdup(name), type);

251
252
	type->output = xmlNewDoc((xmlChar *)"1.0");
	root = xmlNewDocNode(type->output, NULL, (xmlChar *)"mime-type", NULL);
253
254
255
	ns = xmlNewNs(root, FREE_NS, NULL);
	xmlSetNs(root, ns);
	xmlDocSetRootElement(type->output, root);
256
	xmlSetNsProp(root, NULL, (xmlChar *)"type", (xmlChar *)name);
257
	xmlAddChild(root, xmlNewDocComment(type->output,
258
		(xmlChar *)"Created automatically by update-mime-database. DO NOT EDIT!"));
259

260
261
262
263
264
265
	for (i = 0; i < G_N_ELEMENTS(media_types); i++)
	{
		if (strcmp(media_types[i], type->media) == 0)
			return type;
	}

266
	g_warning("Unknown media type in type '%s'", name);
267

268
269
270
	return type;
}

271
/* Test that this node has the expected name and namespace */
272
273
274
275
276
277
static gboolean match_node(xmlNode *node,
			   const char *namespaceURI,
			   const char *localName)
{
	if (namespaceURI)
		return node->ns &&
278
279
			strcmp((char *)node->ns->href, namespaceURI) == 0 &&
  		        strcmp((char *)node->name, localName) == 0;
280
	else
281
		return strcmp((char *)node->name, localName) == 0 && !node->ns;
282
283
}

Bastien Nocera's avatar
Bastien Nocera committed
284
static int get_int_attribute(xmlNode *node, const char *name)
Thomas Leonard's avatar
Thomas Leonard committed
285
286
287
288
{
	char *prio_string;
	int p;

Bastien Nocera's avatar
Bastien Nocera committed
289
	prio_string = my_xmlGetNsProp(node, name, NULL);
Thomas Leonard's avatar
Thomas Leonard committed
290
291
	if (prio_string)
	{
292
293
294
295
296
		char *end;

		p = strtol(prio_string, &end, 10);
		if (*prio_string == '\0' || *end != '\0')
			p = -1;
Thomas Leonard's avatar
Thomas Leonard committed
297
		xmlFree(prio_string);
Thomas Leonard's avatar
Thomas Leonard committed
298
299
300
301
302
303
304
305
		if (p < 0 || p > 100)
			return -1;
		return p;
	}
	else
		return 50;
}

Bastien Nocera's avatar
Bastien Nocera committed
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
/* Return the priority of a <magic> node.
 * Returns 50 if no priority is given, or -1 if a priority is given but
 * is invalid.
 */
static int get_priority(xmlNode *node)
{
       return get_int_attribute (node, "priority");
}

/* Return the weight a <glob> node.
 * Returns 50 if no weight is given, or -1 if a weight is given but
 * is invalid.
 */
static int get_weight(xmlNode *node)
{
       return get_int_attribute (node, "weight");
}

324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
/* Return the value of a false/true attribute, which defaults to false.
 * Returns 0 or 1.
 */
static gboolean get_boolean_attribute(xmlNode *node, const char* name)
{
	char *attr;
	attr = my_xmlGetNsProp(node, name, NULL);
	if (attr)
	{
	    if (strcmp (attr, "true") == 0) 
	    {
		return TRUE;
	    }
	    xmlFree(attr);
	}
	return FALSE;
}

342
/* Process a <root-XML> element by adding a rule to namespace_hash */
343
344
static void add_namespace(Type *type, const char *namespaceURI,
			  const char *localName, GError **error)
Thomas Leonard's avatar
Thomas Leonard committed
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
{
	g_return_if_fail(type != NULL);

	if (!namespaceURI)
	{
		g_set_error(error, MIME_ERROR, 0,
			_("Missing 'namespaceURI' attribute'"));
		return;
	}

	if (!localName)
	{
		g_set_error(error, MIME_ERROR, 0,
			_("Missing 'localName' attribute'"));
		return;
	}

	if (!*namespaceURI && !*localName)
	{
		g_set_error(error, MIME_ERROR, 0,
			_("namespaceURI and localName attributes can't "
			  "both be empty"));
		return;
	}

	if (strpbrk(namespaceURI, " \n") || strpbrk(localName, " \n"))
	{
		g_set_error(error, MIME_ERROR, 0,
			_("namespaceURI and localName cannot contain "
			  "spaces or newlines"));
		return;
	}

	g_hash_table_insert(namespace_hash,
			g_strconcat(namespaceURI, " ", localName, NULL),
			type);
}

383
384
/* 'field' was found in the definition of 'type' and has the freedesktop.org
 * namespace. If it's a known field, process it and return TRUE, else
385
 * return FALSE to add it to the output XML document.
Thomas Leonard's avatar
Thomas Leonard committed
386
 * On error, returns FALSE and sets 'error'.
387
 */
Thomas Leonard's avatar
Thomas Leonard committed
388
389
static gboolean process_freedesktop_node(Type *type, xmlNode *field,
					 GError **error)
390
{
391
392
393
	gboolean copy_to_xml;

	copy_to_xml = FALSE;
394
	if (strcmp((char *)field->name, "glob") == 0)
395
	{
Bastien Nocera's avatar
Bastien Nocera committed
396
397
		gchar *pattern;	
		gint weight;
398
		gboolean case_sensitive;
Bastien Nocera's avatar
Bastien Nocera committed
399
400

		weight = get_weight(field);
401
		case_sensitive = get_boolean_attribute(field, "case-sensitive");
Bastien Nocera's avatar
Bastien Nocera committed
402
403
404
405
406
407

		if (weight == -1)
		{
			g_set_error(error, MIME_ERROR, 0,
				    _("Bad weight (%d) in <glob> element"), weight);
		}
408
		pattern = my_xmlGetNsProp(field, "pattern", NULL);
409

Thomas Leonard's avatar
Thomas Leonard committed
410
		if (pattern && *pattern)
Thomas Leonard's avatar
Thomas Leonard committed
411
		{
Bastien Nocera's avatar
Bastien Nocera committed
412
			Glob *glob;
413
414
			char *pat = case_sensitive ? g_strdup (pattern) : g_ascii_strdown (pattern, -1);
			GList *list = g_hash_table_lookup (globs_hash, pat);
415
			
Bastien Nocera's avatar
Bastien Nocera committed
416
			glob = g_new0 (Glob, 1);
417
			glob->pattern = pat;
Bastien Nocera's avatar
Bastien Nocera committed
418
419
			glob->type = type;
			glob->weight = weight;
420
			glob->case_sensitive = case_sensitive;
Bastien Nocera's avatar
Bastien Nocera committed
421
422
			list = g_list_append (list, glob);
			g_hash_table_insert(globs_hash, g_strdup (glob->pattern), list);
Thomas Leonard's avatar
Thomas Leonard committed
423
			xmlFree(pattern);
424
			copy_to_xml = TRUE;
Thomas Leonard's avatar
Thomas Leonard committed
425
426
		}
		else
Thomas Leonard's avatar
Thomas Leonard committed
427
428
429
430
431
432
433
		{
			if (pattern)
				xmlFree(pattern);
			g_set_error(error, MIME_ERROR, 0,
				_("Missing 'pattern' attribute in <glob> "
				  "element"));
		}
434
	}
435
436
437
438
439
440
441
442
443
444
	else if (strcmp((char *)field->name, "glob-deleteall") == 0)
	{
		Glob *glob;
		GList *list = g_hash_table_lookup (globs_hash, NOGLOBS);

		glob = g_new0 (Glob, 1);
		glob->pattern = g_strdup (NOGLOBS);
		glob->type = type;
		glob->weight = 0;
		glob->noglob = TRUE;
445
		glob->case_sensitive = FALSE;
446
447
448
449
		list = g_list_append (list, glob);
		g_hash_table_insert(globs_hash, g_strdup (glob->pattern), list);
		copy_to_xml = TRUE;
	}
450
	else if (strcmp((char *)field->name, "magic") == 0)
451
	{
452
		Magic *magic;
Thomas Leonard's avatar
Thomas Leonard committed
453

454
		magic = magic_new(field, type, error);
Thomas Leonard's avatar
Thomas Leonard committed
455

456
457
458
459
		if (!*error)
		{
			g_return_val_if_fail(magic != NULL, FALSE);
			g_ptr_array_add(magic_array, magic);
Thomas Leonard's avatar
Thomas Leonard committed
460
461
		}
		else
462
			g_return_val_if_fail(magic == NULL, FALSE);
463
	}
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
	else if (strcmp((char *)field->name, "magic-deleteall") == 0)
	{
		Magic *magic;
		Match *match;

		magic = g_new0(Magic, 1);
		magic->priority = 0;
		magic->type = type;
		magic->nomagic = TRUE;
		match = match_new ();
		match->data = g_strdup (NOMAGIC);
		match->data_length = strlen (NOMAGIC);
		magic->matches = g_list_prepend (NULL, match);

		g_ptr_array_add(magic_array, magic);
	}
480
481
482
483
484
485
486
487
488
489
490
491
492
493
	else if (strcmp((char *)field->name, "treemagic") == 0)
	{
		TreeMagic *magic;

		magic = tree_magic_new(field, type, error);

		if (!*error)
		{
			g_return_val_if_fail(magic != NULL, FALSE);
			g_ptr_array_add(tree_magic_array, magic);
		}
		else
			g_return_val_if_fail(magic == NULL, FALSE);
	}
494
495
496
	else if (strcmp((char *)field->name, "comment") == 0 ||
		 strcmp((char *)field->name, "acronym") == 0 ||
		 strcmp((char *)field->name, "expanded-acronym") == 0)
497
		copy_to_xml = TRUE;
498
499
	else if (strcmp((char *)field->name, "alias") == 0 ||
		 strcmp((char *)field->name, "sub-class-of") == 0)
Thomas Leonard's avatar
Thomas Leonard committed
500
501
502
	{
		char *other_type;
		gboolean valid;
503
504
		GSList *list, *nlist;

505
		other_type = my_xmlGetNsProp(field, "type", NULL);
Thomas Leonard's avatar
Thomas Leonard committed
506
507
		valid = other_type && strchr(other_type, '/');
		if (valid)
508
509
510
511
512
513
514
		{
			char *typename;

			typename = g_strdup_printf("%s/%s", 
						   type->media,
						   type->subtype);
			
515
			if (strcmp((char *)field->name, "alias") == 0)
516
517
518
519
520
521
522
523
524
				g_hash_table_insert(alias_hash,
						    g_strdup(other_type), type);
				
			else
			{
				list = g_hash_table_lookup(subclass_hash, typename);
				nlist = g_slist_append (list, g_strdup(other_type));
				if (list == NULL)
					g_hash_table_insert(subclass_hash, 
525
							    g_strdup(typename), nlist);
526
527
528
			}
			g_free(typename);
			xmlFree(other_type);
529
530
531
532
533
534
535
536
537

			copy_to_xml = TRUE; /* Copy through */
		}
		else
		{
			xmlFree(other_type);
			g_set_error(error, MIME_ERROR, 0,
				    _("Incorrect or missing 'type' attribute "
				      "in <%s>"), field->name);
538
		}
Thomas Leonard's avatar
Thomas Leonard committed
539
	}
540
	else if (strcmp((char *)field->name, "root-XML") == 0)
Thomas Leonard's avatar
Thomas Leonard committed
541
542
543
	{
		char *namespaceURI, *localName;

544
545
		namespaceURI = my_xmlGetNsProp(field, "namespaceURI", NULL);
		localName = my_xmlGetNsProp(field, "localName", NULL);
Thomas Leonard's avatar
Thomas Leonard committed
546
547
548
549
550
551
552
553

		add_namespace(type, namespaceURI, localName, error);

		if (namespaceURI)
			xmlFree(namespaceURI);
		if (localName)
			xmlFree(localName);
	}
Bastien Nocera's avatar
Bastien Nocera committed
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
	else if (strcmp((char *)field->name, "generic-icon") == 0 ||
		 strcmp((char *)field->name, "icon") == 0) 
	{
		char *icon;
		char *typename;

		icon = my_xmlGetNsProp(field, "name", NULL);

		if (icon) 
		{
			typename = g_strdup_printf("%s/%s",
						   type->media,
						   type->subtype);

			if (strcmp((char *)field->name, "icon") == 0)
				g_hash_table_insert(icon_hash,
						    typename, g_strdup (icon));
			else
				g_hash_table_insert(generic_icon_hash,
						    typename, g_strdup (icon));

			xmlFree (icon);

577
			copy_to_xml = TRUE; /* Copy through */
Bastien Nocera's avatar
Bastien Nocera committed
578
579
580
		}
	}

581
582
583
	if (*error)
		return FALSE;
	return !copy_to_xml;
584
585
}

586
587
588
/* Checks to see if 'node' has the given value for xml:lang.
 * If 'lang' is NULL, checks that 'node' doesn't have an xml:lang.
 */
589
590
591
592
static gboolean has_lang(xmlNode *node, const char *lang)
{
	char *lang2;

593
	lang2 = my_xmlGetNsProp(node, "lang", XML_NS);
594
595
596
	if (!lang2)
		return !lang;

597
	if (lang && strcmp(lang, lang2) == 0)
598
	{
Thomas Leonard's avatar
Thomas Leonard committed
599
		xmlFree(lang2);
600
601
		return TRUE;
	}
602
	xmlFree(lang2);
603
604
605
606
607
608
609
610
	return FALSE;
}

/* We're about to add 'new' to the list of fields to be output for the
 * type. Remove any existing nodes which it replaces.
 */
static void remove_old(Type *type, xmlNode *new)
{
611
	xmlNode *field, *fields;
612
	char *lang;
613

614
	if (new->ns == NULL || xmlStrcmp(new->ns->href, FREE_NS) != 0)
615
616
		return;	/* No idea what we're doing -- leave it in! */

617
	if (strcmp((char *)new->name, "comment") != 0)
618
619
		return;

620
	lang = my_xmlGetNsProp(new, "lang", XML_NS);
621

622
623
	fields = xmlDocGetRootElement(type->output);
	for (field = fields->xmlChildrenNode; field; field = field->next)
624
	{
625
		if (match_node(field, (char *)FREE_NS, "comment") &&
626
		    has_lang(field, lang))
627
		{
628
629
			xmlUnlinkNode(field);
			xmlFreeNode(field);
630
631
632
633
			break;
		}
	}

Thomas Leonard's avatar
Thomas Leonard committed
634
	xmlFree(lang);
635
636
}

637
638
639
/* 'node' is a <mime-type> node from a source file, whose type is 'type'.
 * Process all the child elements, setting 'error' if anything goes wrong.
 */
640
static void load_type(Type *type, xmlNode *node, GError **error)
641
642
643
{
	xmlNode *field;

644
645
646
	g_return_if_fail(type != NULL);
	g_return_if_fail(node != NULL);
	g_return_if_fail(error != NULL);
647
648
649
650
651
652
653
654

	for (field = node->xmlChildrenNode; field; field = field->next)
	{
		xmlNode *copy;

		if (field->type != XML_ELEMENT_NODE)
			continue;

655
		if (field->ns && xmlStrcmp(field->ns->href, FREE_NS) == 0)
Thomas Leonard's avatar
Thomas Leonard committed
656
		{
657
658
659
660
661
			if (process_freedesktop_node(type, field, error))
			{
				g_return_if_fail(*error == NULL);
				continue;
			}
Thomas Leonard's avatar
Thomas Leonard committed
662
663
		}

664
665
666
		if (*error)
			return;

667
668
669
670
		copy = xmlDocCopyNode(field, type->output, 1);
		
		/* Ugly hack to stop the xmlns= attributes appearing on
		 * every node...
671
		 */
672
		if (copy->ns && copy->ns->prefix == NULL &&
673
			xmlStrcmp(copy->ns->href, FREE_NS) == 0)
674
675
676
677
678
679
680
681
682
		{
			if (copy->nsDef)
			{
				/* Still used somewhere... */
				/* xmlFreeNsList(copy->nsDef); */
				/* (this leaks) */
				copy->nsDef = NULL;
			}
		}
683

684
685
		remove_old(type, field);

686
		xmlAddChild(xmlDocGetRootElement(type->output), copy);
687
688
689
	}
}

690
691
692
693
/* Parse 'filename' as an XML file and add all the information to the
 * database. If called more than once, information read in later calls
 * overrides information read previously.
 */
694
695
696
697
698
699
700
701
static void load_source_file(const char *filename)
{
	xmlDoc *doc;
	xmlNode *root, *node;

	doc = xmlParseFile(filename);
	if (!doc)
	{
702
		g_warning(_("Failed to parse '%s'"), filename);
703
704
705
706
707
		return;
	}

	root = xmlDocGetRootElement(doc);

708
	if (root->ns == NULL || xmlStrcmp(root->ns->href, FREE_NS) != 0)
709
	{
710
		g_warning("Wrong namespace on document element in '%s' (should be %s)", filename, FREE_NS);
711
712
		goto out;
	}
713

714
	if (strcmp((char *)root->name, "mime-info") != 0)
715
	{
716
		g_warning("Root element <%s> is not <mime-info> (in '%s')", root->name, filename);
717
718
		goto out;
	}
719
720
721

	for (node = root->xmlChildrenNode; node; node = node->next)
	{
722
		Type *type = NULL;
723
		char *type_name = NULL;
724
725
		GError *error = NULL;

726
727
728
		if (node->type != XML_ELEMENT_NODE)
			continue;

729
		if (!match_node(node, (char *)FREE_NS, "mime-type"))
730
731
732
733
734
735
			g_set_error(&error, MIME_ERROR, 0,
				_("Excepted <mime-type>, but got wrong name "
				  "or namespace"));

		if (!error)
		{
736
			type_name = my_xmlGetNsProp(node, "type", NULL);
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756

			if (!type_name)
				g_set_error(&error, MIME_ERROR, 0,
					_("<mime-type> element has no 'type' "
					  "attribute"));
		}

		if (type_name)
		{
			type = get_type(type_name, &error);
			xmlFree(type_name);
		}

		if (!error)
		{
			g_return_if_fail(type != NULL);
			load_type(type, node, &error);
		}
		else
			g_return_if_fail(type == NULL);
757

758
759
		if (error)
		{
760
			g_warning("Error in type '%s/%s' (in %s): %s.",
761
762
763
				  type ? type->media : _("unknown"),
				  type ? type->subtype : _("unknown"),
				  filename, error->message);
764
765
			g_error_free(error);
		}
766
767
768
769
770
	}
out:
	xmlFreeDoc(doc);
}

771
/* Used as the sort function for sorting GPtrArrays */
Thomas Leonard's avatar
Thomas Leonard committed
772
773
774
775
776
777
778
779
static gint strcmp2(gconstpointer a, gconstpointer b)
{
	const char *aa = *(char **) a;
	const char *bb = *(char **) b;

	return strcmp(aa, bb);
}

780
781
782
/* 'path' should be a 'packages' directory. Loads the information from
 * every file in the directory.
 */
783
784
785
786
787
static void scan_source_dir(const char *path)
{
	DIR *dir;
	struct dirent *ent;
	char *filename;
Thomas Leonard's avatar
Thomas Leonard committed
788
789
790
	GPtrArray *files;
	int i;
	gboolean have_override = FALSE;
791
792
793
794
795
796
797
798

	dir = opendir(path);
	if (!dir)
	{
		perror("scan_source_dir");
		exit(EXIT_FAILURE);
	}

Thomas Leonard's avatar
Thomas Leonard committed
799
	files = g_ptr_array_new();
800
801
802
803
804
805
	while ((ent = readdir(dir)))
	{
		int l;
		l = strlen(ent->d_name);
		if (l < 4 || strcmp(ent->d_name + l - 4, ".xml") != 0)
			continue;
Thomas Leonard's avatar
Thomas Leonard committed
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
		if (strcmp(ent->d_name, "Override.xml") == 0)
		{
			have_override = TRUE;
			continue;
		}
		g_ptr_array_add(files, g_strdup(ent->d_name));
	}
	closedir(dir);

	g_ptr_array_sort(files, strcmp2);

	if (have_override)
		g_ptr_array_add(files, g_strdup("Override.xml"));

	for (i = 0; i < files->len; i++)
	{
		gchar *leaf = (gchar *) files->pdata[i];

		filename = g_strconcat(path, "/", leaf, NULL);
825
826
827
828
		load_source_file(filename);
		g_free(filename);
	}

Thomas Leonard's avatar
Thomas Leonard committed
829
830
831
	for (i = 0; i < files->len; i++)
		g_free(files->pdata[i]);
	g_ptr_array_free(files, TRUE);
832
833
}

834
static gboolean save_xml_file(xmlDocPtr doc, const gchar *filename, GError **error)
835
836
837
{
#if LIBXML_VERSION > 20400
	if (xmlSaveFormatFileEnc(filename, doc, "utf-8", 1) < 0)
838
839
840
841
842
	{
		g_set_error(error, G_FILE_ERROR, G_FILE_ERROR_FAILED,
			    "Failed to write XML file; For permission problems, try rerunning as root");
		return FALSE;
	}
843
844
845
#else
	FILE *out;
	
846
	out = fopen_gerror(filename, error);
847
	if (!out)
848
		return FALSE;
849
850
851

	xmlDocDump(out, doc);  /* Some versions return void */

852
853
	if (!fclose_gerror(out, error))
		return FALSE;
854
855
#endif

856
	return TRUE;
857
858
}

859
/* Write out globs for one pattern to the 'globs' file */
Bastien Nocera's avatar
Bastien Nocera committed
860
static void write_out_glob(GList *globs, FILE *stream)
861
{
Bastien Nocera's avatar
Bastien Nocera committed
862
863
	GList *list;
	Glob *glob;
864
	Type *type;
865

Bastien Nocera's avatar
Bastien Nocera committed
866
867
868
869
	for (list = globs; list; list = list->next) {
		glob = (Glob *)list->data;
		type = glob->type;
		if (strchr(glob->pattern, '\n'))
870
871
			g_warning("Glob patterns can't contain literal newlines "
				  "(%s in type %s/%s)", glob->pattern,
872
				  type->media, type->subtype);
873
		else
874
			g_fprintf(stream, "%s/%s:%s\n",
Bastien Nocera's avatar
Bastien Nocera committed
875
				type->media, type->subtype, glob->pattern);
876
	}
877
878
}

Bastien Nocera's avatar
Bastien Nocera committed
879
880
881
882
883
884
/* Write out globs and weights for one pattern to the 'globs2' file */
static void write_out_glob2(GList *globs, FILE *stream)
{
	GList *list;
	Glob *glob;
	Type *type;
885
	gboolean need_flags;
Bastien Nocera's avatar
Bastien Nocera committed
886
887
888
889
890

	for (list = globs ; list; list = list->next) {
		glob = (Glob *)list->data;
		type = glob->type;
		if (strchr(glob->pattern, '\n'))
891
892
			g_warning("Glob patterns can't contain literal newlines "
				  "(%s in type %s/%s)", glob->pattern,
Bastien Nocera's avatar
Bastien Nocera committed
893
				  type->media, type->subtype);
894
		else
895
896
897
898
899
900
901
902
903
904
905
906
		{
			need_flags = FALSE;
			if (glob->case_sensitive)
				need_flags = TRUE;

			if (need_flags) {
				g_fprintf(stream, "%d:%s/%s:%s%s\n",
						  glob->weight, type->media, type->subtype, glob->pattern,
						  glob->case_sensitive ? ":cs" : "");
			}

			/* Always write the line without the flags, for older parsers */
Bastien Nocera's avatar
Bastien Nocera committed
907
			g_fprintf(stream, "%d:%s/%s:%s\n",
908
909
					  glob->weight, type->media, type->subtype, glob->pattern);
		}
Bastien Nocera's avatar
Bastien Nocera committed
910
911
912
913
914
915
916
917
918
919
	}
}

static void collect_glob2(gpointer key, gpointer value, gpointer data)
{
	GList **listp = data;

	*listp = g_list_concat (*listp, g_list_copy ((GList *)value));
}

920
static int compare_glob_by_weight (gpointer a, gpointer b)
Bastien Nocera's avatar
Bastien Nocera committed
921
922
923
924
{
	Glob *ag = (Glob *)a;
	Glob *bg = (Glob *)b;

925
926
927
	if (ag->noglob || bg->noglob)
		return bg->noglob - ag->noglob;

Bastien Nocera's avatar
Bastien Nocera committed
928
929
930
	return bg->weight - ag->weight;
}

931
932
933
934
935
936
937
938
static void
set_error_from_errno (GError **error)
{
	int errsv = errno;
	g_set_error_literal(error, G_FILE_ERROR, g_file_error_from_errno(errsv),
			    g_strerror(errsv));
}

939
/* Renames pathname by removing the .new extension */
940
static gboolean atomic_update(const gchar *pathname, GError **error)
941
{
942
943
	gboolean ret = FALSE;
	gchar *new_name = NULL;
944
	int len;
945
	int fd;
946
947
948

	len = strlen(pathname);

949
	g_return_val_if_fail(strcmp(pathname + len - 4, ".new") == 0, FALSE);
950
951
952

	new_name = g_strndup(pathname, len - 4);

953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
#ifdef HAVE_FDATASYNC
	fd = open(pathname, O_RDONLY);
	if (fd == -1)
	{
		set_error_from_errno(error);
		goto out;
	}
	if (fdatasync(fd) == -1)
	{
		set_error_from_errno(error);
		goto out;
	}
	if (close(fd) == -1)
	{
		set_error_from_errno(error);
		goto out;
	}
#endif

972
973
974
975
#ifdef _WIN32
	/* we need to remove the old file first! */
	remove(new_name);
#endif
976
977
978
979
980
981
982
983
	if (rename(pathname, new_name) == -1)
	{
		int errsv = errno;
		g_set_error(error, G_FILE_ERROR, g_file_error_from_errno(errsv),
			    "Failed to rename %s as %s: %s", pathname, new_name,
			    g_strerror(errsv));
		goto out;
	}
984

985
986
	ret = TRUE;
out:
987
	g_free(new_name);
988
	return ret;
989
990
}

991
/* Write out an XML file for one type */
992
993
994
995
static void write_out_type(gpointer key, gpointer value, gpointer data)
{
	Type *type = (Type *) value;
	const char *mime_dir = (char *) data;
996
	char *media, *filename;
997
	GError *local_error = NULL;
998
999

	media = g_strconcat(mime_dir, "/", type->media, NULL);
1000
1001
1002
#ifdef _WIN32
	mkdir(media);
#else
1003
	mkdir(media, 0755);
1004
#endif
1005

1006
	filename = g_strconcat(media, "/", type->subtype, ".xml.new", NULL);
1007
1008
	g_free(media);
	media = NULL;
1009

1010
1011
	if (!save_xml_file(type->output, filename, &local_error))
		fatal_gerror(local_error);
1012

1013
1014
	if (!atomic_update(filename, &local_error))
		fatal_gerror(local_error);
1015

1016
1017
1018
	g_free(filename);
}

1019
/* Comparison function to get the magic rules in priority order */
1020
static gint cmp_magic(gconstpointer a, gconstpointer b)
Thomas Leonard's avatar
Thomas Leonard committed
1021
{
1022
1023
	Magic *aa = *(Magic **) a;
	Magic *bb = *(Magic **) b;
1024
	int retval;
Thomas Leonard's avatar
Thomas Leonard committed
1025

1026
1027
1028
1029
	/* Sort nomagic items at start */
	if (aa->nomagic || bb->nomagic)
		return bb->nomagic - aa->nomagic;

1030
	if (aa->priority > bb->priority)
Thomas Leonard's avatar
Thomas Leonard committed
1031
		return -1;
1032
	else if (aa->priority < bb->priority)
1033
		return 1;
1034

1035
1036
1037
	retval = strcmp(aa->type->media, bb->type->media);
	if (!retval)
		retval = strcmp(aa->type->subtype, bb->type->subtype);
1038
1039

	return retval;
Thomas Leonard's avatar
Thomas Leonard committed
1040
1041
}

1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
/* Comparison function to get the tree magic rules in priority order */
static gint cmp_tree_magic(gconstpointer a, gconstpointer b)
{
	TreeMagic *aa = *(TreeMagic **) a;
	TreeMagic *bb = *(TreeMagic **) b;
	int retval;

	if (aa->priority > bb->priority)
		return -1;
	else if (aa->priority < bb->priority)
		return 1;

	retval = strcmp(aa->type->media, bb->type->media);
	if (!retval)
		retval = strcmp(aa->type->subtype, bb->type->subtype);

	return retval;
}

1061
/* Write out 'n' as a two-byte big-endian number to 'stream' */
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
static void write16(FILE *stream, guint32 n)
{
	guint16 big = GUINT16_TO_BE(n);

	g_return_if_fail(n <= 0xffff);

	fwrite(&big, sizeof(big), 1, stream);
}

/* Single hex char to int; -1 if not a hex char.
 * From file(1).
 */
static int hextoint(int c)
{
	if (!isascii((unsigned char) c))
		return -1;
	if (isdigit((unsigned char) c))
		return c - '0';
	if ((c >= 'a')&&(c <= 'f'))
		return c + 10 - 'a';
	if (( c>= 'A')&&(c <= 'F'))
		return c + 10 - 'A';
	return -1;
}

/*
 * Convert a string containing C character escapes.  Stop at an unescaped
 * space or tab.
 * Copy the converted version to "p", returning its length in *slen.
 * Return updated scan pointer as function result.
 * Stolen from file(1) and heavily modified.
 */
static void getstr(const char *s, GString *out)
{
	int	c;
	int	val;

	while ((c = *s++) != '\0') {
		if(c == '\\') {
			switch(c = *s++) {

			case '\0':
				return;

			default:
				g_string_append_c(out, (char) c);
				break;

			case 'n':
				g_string_append_c(out, '\n');
				break;

			case 'r':
				g_string_append_c(out, '\r');
				break;

			case 'b':
				g_string_append_c(out, '\b');
				break;

			case 't':
				g_string_append_c(out, '\t');
				break;

			case 'f':
				g_string_append_c(out, '\f');
				break;

			case 'v':
				g_string_append_c(out, '\v');
				break;

			/* \ and up to 3 octal digits */
			case '0':
			case '1':
			case '2':
			case '3':
			case '4':
			case '5':
			case '6':
			case '7':
				val = c - '0';
				c = *s++;  /* try for 2 */
				if(c >= '0' && c <= '7') {
					val = (val<<3) | (c - '0');
					c = *s++;  /* try for 3 */
					if(c >= '0' && c <= '7')
						val = (val<<3) | (c-'0');
					else
						--s;
				}
				else
					--s;
				g_string_append_c(out, (char)val);
				break;

			/* \x and up to 2 hex digits */
			case 'x':
				val = 'x';	/* Default if no digits */
				c = hextoint(*s++);	/* Get next char */
				if (c >= 0) {
					val = c;
					c = hextoint(*s++);
					if (c >= 0)
						val = (val << 4) + c;
					else
						--s;
				} else
					--s;
				g_string_append_c(out, (char)val);
				break;
			}
		} else
			g_string_append_c(out, (char)c);
	}
}

1179
1180
1181
/* Parse the value and mask attributes of a <match> element with a
 * numerical type (anything except "string").
 */
1182
1183
static void parse_int_value(int bytes, const char *in, const char *in_mask,
			    GString *parsed_value, char **parsed_mask,
1184
			    gboolean big_endian, GError **error)
1185
1186
{
	char *end;
1187
	char *out_mask = NULL;
1188
	unsigned long value;
1189
	int b;
1190

1191
1192
1193
1194
1195
1196
1197
1198
	value = strtoul(in, &end, 0);
	if (errno == ERANGE) {
		g_set_error(error, MIME_ERROR, 0,
			    "Number out-of-range (%s should fit in %d bytes)",
			    in, bytes);
		return;
	}

1199
1200
	if (*end != '\0')
	{
Thomas Leonard's avatar
Thomas Leonard committed
1201
		g_set_error(error, MIME_ERROR, 0, "Value is not a number");
1202
1203
		return;
	}
1204

1205
	for (b = 0; b < bytes; b++)
1206
	{
1207
		int shift = (big_endian ? (bytes - b - 1) : b) * 8;
1208
		g_string_append_c(parsed_value, (value >> shift) & 0xff);
1209
	}
1210

1211
1212
1213
1214
1215
1216
1217
1218
1219
	if ((bytes == 1 && (value & ~0xff)) ||
	    (bytes == 2 && (value & ~0xffff)))
	{
		g_set_error(error, MIME_ERROR, 0,
			    "Number out-of-range (%lx should fit in %d bytes)",
			    value, bytes);
		return;
	}

1220
	if (in_mask)
1221
	{
1222
		int b;
1223
		unsigned long mask;
1224
		
1225
1226
1227
1228
1229
1230
1231
1232
1233
		mask = strtoul(in_mask, &end, 0);
		if (errno == ERANGE) {
			g_set_error(error, MIME_ERROR, 0,
				    "Mask out-of-range (%s should fit in %d bytes)",
				    in_mask, bytes);
			return;
		}


1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
		if (*end != '\0')
		{
			g_set_error(error, MIME_ERROR, 0,
				    "Mask is not a number");
			return;
		}

		out_mask = g_new(char, bytes);
		for (b = 0; b < bytes; b++)
		{
1244
			int shift = (big_endian ? (bytes - b - 1) : b) * 8;
1245
1246
			out_mask[b] = (mask >> shift) & 0xff;
		}
1247
	}
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263

	*parsed_mask = out_mask;
}

/* 'len' is the length of the value. The mask created will be the same
 * length.
 */
static char *parse_string_mask(const char *mask, int len, GError **error)
{
	int i;
	char *parsed_mask = NULL;

	g_return_val_if_fail(mask != NULL, NULL);
	g_return_val_if_fail(len > 0, NULL);

	if (mask[0] != '0' || mask[1] != 'x')
1264
	{
1265
1266
1267
		g_set_error(error, MIME_ERROR, 0,
			"String masks must be in base 16 (starting with 0x)");
		goto err;
1268
	}
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302