Commit 1bac8224 authored by Bastien Nocera's avatar Bastien Nocera
Browse files

2008-07-18 Bastien Nocera <hadess@hadess.net>

	* .cvsignore: upd

	* shared-mime-info-spec.xml: Update the spec to contain
	"content-types" or "tree magic", to detect an "x-content/" mime-type
	from a directory structure, patch from Matthias Clasen
	<mclasen@redhat.com>

	* update-mime-database.c (process_freedesktop_node),
	(cmp_tree_magic), (tree_match_new), (tree_match_free),
	(build_tree_matches), (tree_magic_free), (tree_magic_new),
	(write_tree_magic_children), (write_tree_magic), (add_type),
	(write_types), (write_header), (write_types_cache), (write_cache),
	(main): Add support for writing the tree-magic file, as per the
	spec update above, patch from Matthias Clasen <mclasen@redhat.com>

	* freedesktop.org.xml.in: Add one single tree-magic item, for
	testing

	* configure.in: check for GIO, build the tree magic test if it's
	available
	* Makefile.am: use test-tree-magic if it's there
	* test-tree-magic.c: automated testing for tree magic, based on GIO
	code by Matthias Clasen <mclasen@redhat.com>
	* tests/tree-list: A few tests
parent d5b5d265
......@@ -20,3 +20,4 @@ stamp-h*
update-mime-database
shared-mime-info.pc
test-subclassing
test-tree-magic
2008-07-18 Bastien Nocera <hadess@hadess.net>
* .cvsignore: upd
* shared-mime-info-spec.xml: Update the spec to contain
"content-types" or "tree magic", to detect an "x-content/" mime-type
from a directory structure, patch from Matthias Clasen
<mclasen@redhat.com>
* update-mime-database.c (process_freedesktop_node),
(cmp_tree_magic), (tree_match_new), (tree_match_free),
(build_tree_matches), (tree_magic_free), (tree_magic_new),
(write_tree_magic_children), (write_tree_magic), (add_type),
(write_types), (write_header), (write_types_cache), (write_cache),
(main): Add support for writing the tree-magic file, as per the
spec update above, patch from Matthias Clasen <mclasen@redhat.com>
* freedesktop.org.xml.in: Add one single tree-magic item, for
testing
* configure.in: check for GIO, build the tree magic test if it's
available
* Makefile.am: use test-tree-magic if it's there
* test-tree-magic.c: automated testing for tree magic, based on GIO
code by Matthias Clasen <mclasen@redhat.com>
* tests/tree-list: A few tests
2008-06-22 Bastien Nocera <hadess@hadess.net>
* freedesktop.org.xml.in: Add support for the extended URL format
......
......@@ -15,11 +15,19 @@ test_subclassing_SOURCES = test-subclassing.c
test_subclassing_CFLAGS = $(ALL_CFLAGS)
test_subclassing_LDADD = $(ALL_LIBS)
if HAVE_GIO
noinst_PROGRAMS += test-tree-magic
test_tree_magic_SOURCES = test-tree-magic.c
test_tree_magic_CFLAGS = $(GIO_CFLAGS)
test_tree_magic_LDADD = $(GIO_LIBS)
endif
man_MANS = update-mime-database.1
EXTRA_DIST = \
shared-mime-info-spec.xml \
freedesktop.org.xml.in \
test-tree-magic.c \
$(packages_DATA) \
README \
HACKING \
......@@ -66,11 +74,12 @@ check:
@if test -n $(XMLLINT) ; then \
xmllint --noout --valid $(srcdir)/freedesktop.org.xml; \
fi
@if test -d CVS/ && test -x ../xdgmime/src/test-mime-data ; then \
if test -d CVS/ && test -x ../xdgmime/src/test-mime-data && test -x $(top_builddir)/test-tree-magic ; then \
mkdir -p $(top_builddir)/temp-mime-dir/mime/packages ; \
cp -a $(top_builddir)/freedesktop.org.xml $(top_builddir)/temp-mime-dir/mime/packages/ ; \
XDG_DATA_DIRS="$(top_builddir)/temp-mime-dir/" $(top_builddir)/update-mime-database "$(top_builddir)/temp-mime-dir/mime/" ; \
XDG_DATA_DIRS="$(top_builddir)/temp-mime-dir/" ../xdgmime/src/test-mime-data -v tests/list || (XDG_DATA_DIRS="$(top_builddir)/temp-mime-dir/" ../xdgmime/src/test-mime-data -v -v tests/list ; exit 1); \
XDG_DATA_DIRS="$(top_builddir)/temp-mime-dir/" $(top_builddir)/test-tree-magic tests/tree-list; \
rm -rf "$(top_builddir)/temp-mime-dir/" ; \
fi
@if `grep comment $(srcdir)/freedesktop.org.xml.in | grep -v _comment | grep -q -v '<!'` ; then \
......
......@@ -17,13 +17,18 @@ GETTEXT_PACKAGE=shared-mime-info
AC_SUBST(GETTEXT_PACKAGE)
AM_GLIB_GNU_GETTEXT
dnl Check pkg-config is installed
dnl Check whether libxml and glib are present is installed
PKG_CHECK_MODULES(ALL, \
libxml-2.0 >= 2.4 \
glib-2.0 >= 2.6.0)
AC_SUBST(ALL_CFLAGS)
AC_SUBST(ALL_LIBS)
dnl Build the tree magic test if gio is present
PKG_CHECK_MODULES(GIO, gio-2.0, HAVE_GIO=yes, AC_MSG_RESULT(no))
AM_CONDITIONAL([HAVE_GIO], test "x$HAVE_GIO" = "xyes")
dnl Use -Wall, etc if possible
[
if test "x$GCC" = "xyes"; then
......
......@@ -3,7 +3,7 @@
<!ELEMENT mime-info (mime-type)+>
<!ATTLIST mime-info xmlns CDATA #FIXED "http://www.freedesktop.org/standards/shared-mime-info">
<!ELEMENT mime-type (comment+, (acronym,expanded-acronym)? , (generic-icon? | glob | magic | root-XML | alias | sub-class-of)*)>
<!ELEMENT mime-type (comment+, (acronym,expanded-acronym)? , (generic-icon? | glob | magic | treemagic | root-XML | alias | sub-class-of)*)>
<!ATTLIST mime-type type CDATA #REQUIRED>
<!-- a comment describing a document with the respective MIME type. Example: "WMV video" -->
......@@ -36,6 +36,17 @@
<!ATTLIST match value CDATA #REQUIRED>
<!ATTLIST match mask CDATA #IMPLIED>
<!ELEMENT treemagic (treematch)+>
<!ATTLIST treemagic priority CDATA #IMPLIED>
<!ELEMENT treematch (treematch)*>
<!ATTLIST treematch path CDATA #REQUIRED>
<!ATTLIST treematch type (file|directory|link) #IMPLIED>
<!ATTLIST treematch match-case (true|false) #IMPLIED>
<!ATTLIST treematch executable (true|false) #IMPLIED>
<!ATTLIST treematch non-empty (true|false) #IMPLIED>
<!ATTLIST treematch mimetype CDATA #IMPLIED>
<!ELEMENT root-XML EMPTY>
<!ATTLIST root-XML
namespaceURI CDATA #REQUIRED
......@@ -4762,4 +4773,13 @@ command to generate the output files.
</magic>
<glob pattern="*.pcf"/>
</mime-type>
<!-- Tree content-types -->
<mime-type type="x-content/image-dcf">
<_comment>Digital Photos</_comment>
<treemagic priority="75">
<treematch path="dcim" type="directory" non-empty="true"/>
</treemagic>
</mime-type>
</mime-info>
<?xml version="1.0" standalone="no"?>
<!DOCTYPE article PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" [
<!ENTITY updated "25 January 2008">
<!ENTITY version "0.17">
<!ENTITY updated "10 June 2008">
<!ENTITY version "0.18">
]>
<article id="index">
......@@ -343,6 +343,36 @@ of the document element.
If <userinput>localName</userinput> is present but empty then the document element may have
any name, but the namespace must still match.
</para></listitem>
<listitem><para>
<userinput>treemagic</userinput> elements contain a list of <userinput>treematch</userinput> elements,
any of which may match, and an optional <userinput>priority</userinput> attribute for all of the
contained rules. The default priority value is 50, and the maximum is 100.
</para><para>
Each <userinput>treematch</userinput> element has a number of attributes:
<informaltable>
<tgroup cols="3">
<thead><row><entry>Attribute</entry><entry>Required?</entry><entry>Value</entry></row></thead>
<tbody>
<row><entry>path</entry><entry>Yes</entry><entry>A path that must be present on the mounted volume/filesystem.</entry></row>
<row><entry>type</entry><entry>No</entry><entry>The type of path. Possible values: <userinput>file</userinput>, <userinput>directory</userinput>, <userinput>link</userinput></entry></row>
<row><entry>match-case</entry><entry>No</entry><entry>Whether path should be matched case-sensitively. Possible values: <userinput>true</userinput>, <userinput>false</userinput></entry></row>
<row><entry>executable</entry><entry>No</entry><entry>Whether the file must be executable. Possible values: <userinput>true</userinput>, <userinput>false</userinput></entry></row>
<row><entry>non-empty</entry><entry>No</entry><entry>Whether the directory must be non-empty. Possible values: <userinput>true</userinput>, <userinput>false</userinput></entry></row>
<row><entry>mimetype</entry><entry>No</entry><entry>The mimetype for the file at path</entry></row>
</tbody></tgroup>
</informaltable>
<userinput>treematch</userinput> elements can be nested, meaning that both the outer and the inner <userinput>treematch</userinput>
must be satisfied for a "match".
</para></listitem>
</itemizedlist>
Applications may also define their own elements, provided they are namespaced to prevent collisions.
Unknown elements are copied directly to the output XML files like <userinput>comment</userinput>
......@@ -573,6 +603,39 @@ application/msword:x-office-document
</screen>
</para>
</sect2>
<sect2>
<title>The treemagic files</title>
<para>
The tree magic data is stored in a file with a format that is very similar to the magic file format.
</para>
<para>
The file starts with the magic string "MIME-TreeMagic\0\n". There is no version number in the file.
Incompatible changes will be handled by creating both the current `treemagic' and a newer `treemagic2'
in the new format. Where possible, changes will be made in a compatible fashion.
</para>
<para>
The rest of the file is made up of a sequence of small sections. Each section is introduced by giving
the priority and type in brackeds, followed by a newline character. Higher priority entries come
first. Example:
<screen>[50:x-content/image-dcf]\n</screen>
Each line in the section takes the form:
<screen>[ indent ] ">" "\"" path "\"" "=" type [ "," option ]* "\n"</screen>
<informaltable>
<tgroup cols="2">
<thead><row><entry>Part</entry><entry>Meaning</entry></row></thead>
<tbody>
<row><entry>indent</entry><entry>The nesting depth of the rule.</entry></row>
<row><entry>path</entry><entry>The path to match.</entry></row>
<row><entry>type</entry><entry>The required file type, one of "file", "directory", "link" or "any"</entry></row>
<row><entry>option</entry><entry>Optional for the optional attributes of <userinput>treematch</userinput> elements.
Possible values are "executable", "match-case", "non-empty", or a MIME type</entry></row>
</tbody>
</tgroup>
</informaltable>
</para><para>
</para>
</sect2>
<sect2>
<title>The mime.cache files</title>
<para>
......@@ -846,6 +909,22 @@ they differ, they are from different devices and the directory is a mount
point.
</para>
</sect2>
<sect2>
<title>Content types for volumes</title>
<para>
Traditional MIME types apply to individual files or bytestreams. It is often useful
to apply the same methodologies when classifying the content of mountable volumes or
filesystems. The x-content type has been introduced for this purpose. Typical examples
are x-content/audio-dvd, x-content/blank-cd or x-content/image-dcf.
</para>
<para>
Matching of content types works with <userinput>treemagic</userinput> elements, which
are analogous to the <userinput>magic</userinput> elements used for MIME type matching.
Instead of looking for byte sequences in files, <userinput>treemagic</userinput> element
allow to look for files with certain names, permissions or mime types in a directory
hierarchy.
</para>
</sect2>
<sect2>
<title>Security implications</title>
<para>
......
<
/*
* Copyright (C) 2008 Red Hat, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General
* Public License along with this library; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place, Suite 330,
* Boston, MA 02111-1307, USA.
*
* Author: Matthias Clasen <mclasen@redhat.com>
*/
#include <stdlib.h>
#include <string.h>
#include <glib.h>
#include <gio/gio.h>
typedef struct
{
gchar *path;
GFileType type;
guint match_case : 1;
guint executable : 1;
guint non_empty : 1;
guint on_disc : 1;
gchar *mimetype;
GList *matches;
} TreeMatchlet;
typedef struct
{
gchar *contenttype;
gint priority;
GList *matches;
} TreeMatch;
static GList *tree_matches = NULL;
static void
tree_matchlet_free (TreeMatchlet *matchlet)
{
g_list_foreach (matchlet->matches, (GFunc)tree_matchlet_free, NULL);
g_list_free (matchlet->matches);
g_free (matchlet->path);
g_free (matchlet->mimetype);
g_slice_free (TreeMatchlet, matchlet);
}
static void
tree_match_free (TreeMatch *match)
{
g_list_foreach (match->matches, (GFunc)tree_matchlet_free, NULL);
g_list_free (match->matches);
g_free (match->contenttype);
g_slice_free (TreeMatch, match);
}
static void
tree_magic_shutdown (void)
{
g_list_foreach (tree_matches, (GFunc)tree_match_free, NULL);
g_list_free (tree_matches);
tree_matches = NULL;
}
static TreeMatch *
parse_header (gchar *line)
{
gint len;
gchar *s;
TreeMatch *match;
len = strlen (line);
if (line[0] != '[' || line[len - 1] != ']')
return NULL;
line[len - 1] = 0;
s = strchr (line, ':');
match = g_slice_new0 (TreeMatch);
match->priority = atoi (line + 1);
match->contenttype = g_strdup (s + 1);
return match;
}
static TreeMatchlet *
parse_match_line (gchar *line,
gint *depth)
{
gchar *s, *p;
TreeMatchlet *matchlet;
gchar **parts;
gint i;
matchlet = g_slice_new0 (TreeMatchlet);
if (line[0] == '>') {
*depth = 0;
s = line;
}
else {
*depth = atoi (line);
s = strchr (line, '>');
}
s += 2;
p = strchr (s, '"');
*p = 0;
matchlet->path = g_strdup (s);
s = p + 1;
parts = g_strsplit (s, ",", 0);
if (strcmp (parts[0], "=file") == 0)
matchlet->type = G_FILE_TYPE_REGULAR;
else if (strcmp (parts[0], "=directory") == 0)
matchlet->type = G_FILE_TYPE_DIRECTORY;
else if (strcmp (parts[0], "=link") == 0)
matchlet->type = G_FILE_TYPE_SYMBOLIC_LINK;
else
matchlet->type = G_FILE_TYPE_UNKNOWN;
for (i = 1; parts[i]; i++) {
if (strcmp (parts[i], "executable") == 0)
matchlet->executable = 1;
else if (strcmp (parts[i], "match-case") == 0)
matchlet->match_case = 1;
else if (strcmp (parts[i], "non-empty") == 0)
matchlet->non_empty = 1;
else if (strcmp (parts[i], "on-disc") == 0)
matchlet->on_disc = 1;
else
matchlet->mimetype = g_strdup (parts[i]);
}
g_strfreev (parts);
return matchlet;
}
static gint
cmp_match (gconstpointer a, gconstpointer b)
{
const TreeMatch *aa = (const TreeMatch *)a;
const TreeMatch *bb = (const TreeMatch *)b;
return bb->priority - aa->priority;
}
static void
insert_match (TreeMatch *match)
{
tree_matches = g_list_insert_sorted (tree_matches, match, cmp_match);
}
static void
insert_matchlet (TreeMatch *match,
TreeMatchlet *matchlet,
gint depth)
{
if (depth == 0)
match->matches = g_list_append (match->matches, matchlet);
else {
GList *last;
TreeMatchlet *m;
last = g_list_last (match->matches);
if (!last) {
tree_matchlet_free (matchlet);
g_warning ("can't insert matchlet at depth %d", depth);
return;
}
m = (TreeMatchlet *) last->data;
depth--;
while (depth > 0) {
last = g_list_last (m->matches);
if (!last) {
tree_matchlet_free (matchlet);
g_warning ("can't insert matchlet at depth %d", depth);
return;
}
m = (TreeMatchlet *) last->data;
depth--;
}
m->matches = g_list_append (m->matches, matchlet);
}
}
static void
read_tree_magic_from_directory (const gchar *prefix)
{
gchar *filename;
gchar *text;
gsize len;
gint i;
TreeMatch *match;
TreeMatchlet *matchlet;
gint depth;
filename = g_build_filename (prefix, "mime", "treemagic", NULL);
if (g_file_get_contents (filename, &text, &len, NULL)) {
if (strcmp (text, "MIME-TreeMagic") == 0) {
gchar **lines;
lines = g_strsplit (text + strlen ("MIME-TreeMagic") + 2, "\n", 0);
for (i = 0; lines[i] && lines[i][0]; i++) {
if (lines[i][0] == '[') {
match = parse_header (lines[i]);
insert_match (match);
}
else {
matchlet = parse_match_line (lines[i], &depth);
insert_matchlet (match, matchlet, depth);
}
}
g_strfreev (lines);
}
else
g_warning ("%s: header not found, skipping\n", filename);
g_free (text);
}
g_free (filename);
}
typedef struct
{
gchar *path;
gint depth;
gboolean ignore_case;
gchar **components;
gchar **case_components;
GFileEnumerator **enumerators;
GFile **children;
} Enumerator;
static gboolean
component_match (Enumerator *e,
gint depth,
const gchar *name)
{
gchar *case_folded, *key;
gboolean found;
if (strcmp (name, e->components[depth]) == 0)
return TRUE;
if (!e->ignore_case)
return FALSE;
case_folded = g_utf8_casefold (name, -1);
key = g_utf8_collate_key (case_folded, -1);
found = strcmp (key, e->case_components[depth]) == 0;
g_free (case_folded);
g_free (key);
return found;
}
static GFile *
next_match_recurse (Enumerator *e,
gint depth)
{
GFile *file;
GFileInfo *info;
const gchar *name;
while (TRUE) {
if (e->enumerators[depth] == NULL) {
if (depth > 0) {
file = next_match_recurse (e, depth - 1);
if (file) {
e->children[depth] = file;
e->enumerators[depth] = g_file_enumerate_children (file,
G_FILE_ATTRIBUTE_STANDARD_NAME,
G_FILE_QUERY_INFO_NONE,
NULL,
NULL);
}
}
if (e->enumerators[depth] == NULL)
return NULL;
}
while ((info = g_file_enumerator_next_file (e->enumerators[depth], NULL, NULL))) {
name = g_file_info_get_name (info);
if (component_match (e, depth, name)) {
file = g_file_get_child (e->children[depth], name);
g_object_unref (info);
return file;
}
g_object_unref (info);
}
g_object_unref (e->enumerators[depth]);
e->enumerators[depth] = NULL;
g_object_unref (e->children[depth]);
e->children[depth] = NULL;
}
}
static GFile *
enumerator_next (Enumerator *e)
{
return next_match_recurse (e, e->depth - 1);
}
static Enumerator *
enumerator_new (GFile *root,
const char *path,
gboolean ignore_case)
{
Enumerator *e;
gint i;
gchar *case_folded;
e = g_new0 (Enumerator, 1);
e->path = g_strdup (path);
e->ignore_case = ignore_case;
e->components = g_strsplit (e->path, G_DIR_SEPARATOR_S, -1);
e->depth = g_strv_length (e->components);
if (e->ignore_case) {
e->case_components = g_new0 (char *, e->depth + 1);
for (i = 0; e->components[i]; i++) {
case_folded = g_utf8_casefold (e->components[i], -1);
e->case_components[i] = g_utf8_collate_key (case_folded, -1);
g_free (case_folded);
}
}
e->children = g_new0 (GFile *, e->depth);
e->children[0] = g_object_ref (root);
e->enumerators = g_new0 (GFileEnumerator *, e->depth);
e->enumerators[0] = g_file_enumerate_children (root,
G_FILE_ATTRIBUTE_STANDARD_NAME,
G_FILE_QUERY_INFO_NONE,
NULL,
NULL);
return e;
}
static void
enumerator_free (Enumerator *e)
{
gint i;
for (i = 0; i < e->depth; i++) {
if (e->enumerators[i])
g_object_unref (e->enumerators[i]);
if (e->children[i])
g_object_unref (e->children[i]);
}
g_free (e->enumerators);
g_free (e->children);
g_strfreev (e->components);
if (e->case_components)
g_strfreev (e->case_components);
g_free (e->path);
g_free (e);
}
static gboolean
matchlet_match (TreeMatchlet *matchlet,
GFile *root)
{
GFile *file;
GFileInfo *info;
gboolean result;
const gchar *attrs;
Enumerator *e;