Merge branch 'emoji-data-cldr' into 'master'

Update Emoji data

Closes #1511 and #950

See merge request GNOME/gtk!2656
This commit is contained in:
Matthias Clasen
2020-10-04 13:56:40 +00:00
17 changed files with 294 additions and 50053 deletions

23
gtk/emoji/README.md Normal file
View File

@@ -0,0 +1,23 @@
Emoji data
==========
We use Emoji data from Unicode and the CLDR, stored in a GVariant.
The immediate source for our data is the json files from
https://github.com/milesj/emojibase.git
To convert the data from that repository to a GVariant that GTK
can use, the convert-emoji tool can be used:
convert-emoji $emojibase/packages/data/de/data.raw.json de.data
for example (for German).
To make these usable by GTK, we wrap them in a resource bundle
that has the GVariant as
/org/gtk/libgtk/emoji/de.data
and install the resulting resource bundle at this location:
/usr/share/gtk-4.0/emoji/de.gresource

View File

@@ -18,18 +18,40 @@
/* Build with gcc -o convert-emoji convert-emoji.c `pkg-config --cflags --libs json-glib-1.0`
*/
/* Reads data from the json files in emojibase, expecting
* language-specific data.raw.json as input
*/
/* The format of the generated data is: a(ausasu).
* Each member of the array has the following fields:
* au - sequence of unicode codepoints. If the
* sequence contains a 0, it marks the point
* where skin tone modifiers should be inserted
* s - name, e.g. "man worker"
* as - keywords, e.g. "man", "worker"
* u - the group that this item belongs to:
* 0: smileys-emotion
* 1: people-body
* 2: component
* 3: animals-nature
* 4: food-drink
* 5: travel-places
* 6: activities
* 7: objects
* 8: symbols
* 9: flags
*/
#include <json-glib/json-glib.h>
#include <string.h>
gboolean
parse_code (GVariantBuilder *b,
const char *code,
GString *name_key)
const char *code)
{
g_auto(GStrv) strv = NULL;
int j;
strv = g_strsplit (code, " ", -1);
strv = g_strsplit (code, "-", -1);
for (j = 0; strv[j]; j++)
{
guint32 u;
@@ -44,12 +66,7 @@ parse_code (GVariantBuilder *b,
if (0x1f3fb <= u && u <= 0x1f3ff)
g_variant_builder_add (b, "u", 0);
else
{
g_variant_builder_add (b, "u", u);
if (j > 0)
g_string_append_c (name_key, '-');
g_string_append_printf (name_key, "%x", u);
}
g_variant_builder_add (b, "u", u);
}
return TRUE;
@@ -60,57 +77,27 @@ main (int argc, char *argv[])
{
JsonParser *parser;
JsonNode *root;
JsonArray *array;
JsonObject *ro;
JsonArray *array;
JsonNode *node;
const char *name;
const char *unicode;
JsonObjectIter iter;
GError *error = NULL;
guint length, i;
GVariantBuilder builder;
GVariant *v;
GString *s;
GBytes *bytes;
GHashTable *names;
GString *name_key;
if (argc != 4)
if (argc != 3)
{
g_print ("Usage: emoji-convert INPUT INPUT1 OUTPUT\n");
g_print ("Usage: emoji-convert INPUT OUTPUT\n");
return 1;
}
parser = json_parser_new ();
if (!json_parser_load_from_file (parser, argv[2], &error))
{
g_error ("%s", error->message);
return 1;
}
root = json_parser_get_root (parser);
ro = json_node_get_object (root);
json_object_iter_init (&iter, ro);
names = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, g_free);
name_key = g_string_new ("");
while (json_object_iter_next (&iter, &name, &node))
{
JsonObject *obj = json_node_get_object (node);
const char *unicode;
const char *shortname;
unicode = json_object_get_string_member (obj, "unicode");
shortname = json_object_get_string_member (obj, "shortname");
g_hash_table_insert (names, g_strdup (unicode), g_strdup (shortname));
}
g_object_unref (parser);
parser = json_parser_new ();
if (!json_parser_load_from_file (parser, argv[1], &error))
{
g_error ("%s", error->message);
@@ -121,65 +108,74 @@ main (int argc, char *argv[])
array = json_node_get_array (root);
length = json_array_get_length (array);
g_variant_builder_init (&builder, G_VARIANT_TYPE ("a(auss)"));
i = 0;
while (i < length)
g_variant_builder_init (&builder, G_VARIANT_TYPE ("a(ausasu)"));
for (i = 0; i < length; i++)
{
JsonNode *node = json_array_get_element (array, i);
JsonObject *obj = json_node_get_object (node);
JsonObject *obj = json_array_get_object_element (array, i);
GVariantBuilder b1;
GVariantBuilder b2;
guint group;
const char *name;
const char *shortname;
char *code;
int j;
gboolean skip;
gboolean has_variations;
i++;
if (!json_object_has_member (obj, "group"))
continue;
group = json_object_get_int_member (obj, "group");
name = json_object_get_string_member (obj, "annotation");
if (json_object_has_member (obj, "skins"))
{
JsonArray *a2 = json_object_get_array_member (obj, "skins");
JsonNode *n2 = json_array_get_element (a2, 0);
JsonObject *o2 = json_node_get_object (n2);
code = g_strdup (json_object_get_string_member (o2, "hexcode"));
}
else
{
code = g_strdup (json_object_get_string_member (obj, "hexcode"));
}
g_variant_builder_init (&b1, G_VARIANT_TYPE ("au"));
name = json_object_get_string_member (obj, "name");
code = g_strdup (json_object_get_string_member (obj, "code"));
has_variations = FALSE;
while (i < length)
{
JsonNode *node2 = json_array_get_element (array, i);
JsonObject *obj2 = json_node_get_object (node2);
const char *name2;
const char *code2;
name2 = json_object_get_string_member (obj2, "name");
code2 = json_object_get_string_member (obj2, "code");
if (!strstr (name2, "skin tone") || !g_str_has_prefix (name2, name))
break;
if (!has_variations)
{
has_variations = TRUE;
g_free (code);
code = g_strdup (code2);
}
i++;
}
g_string_set_size (name_key, 0);
if (!parse_code (&b1, code, name_key))
if (!parse_code (&b1, code))
return 1;
shortname = g_hash_table_lookup (names, name_key->str);
g_variant_builder_init (&b2, G_VARIANT_TYPE ("as"));
if (json_object_has_member (obj, "tags"))
{
JsonArray *tags = json_object_get_array_member (obj, "tags");
for (int j = 0; j < json_array_get_length (tags); j++)
g_variant_builder_add (&b2, "s", json_array_get_string_element (tags, j));
}
g_variant_builder_add (&builder, "(auss)", &b1, name, shortname ? shortname : "");
g_variant_builder_add (&builder, "(ausasu)", &b1, name, &b2, group);
}
v = g_variant_builder_end (&builder);
bytes = g_variant_get_data_as_bytes (v);
if (!g_file_set_contents (argv[3], g_bytes_get_data (bytes, NULL), g_bytes_get_size (bytes), &error))
if (g_str_has_suffix (argv[2], ".json"))
{
g_error ("%s", error->message);
return 1;
JsonNode *node;
char *out;
node = json_gvariant_serialize (v);
out = json_to_string (node, TRUE);
if (!g_file_set_contents (argv[2], out, -1, &error))
{
g_error ("%s", error->message);
return 1;
}
}
else
{
GBytes *bytes;
bytes = g_variant_get_data_as_bytes (v);
if (!g_file_set_contents (argv[2], g_bytes_get_data (bytes, NULL), g_bytes_get_size (bytes), &error))
{
g_error ("%s", error->message);
return 1;
}
}
return 0;

BIN
gtk/emoji/de.data Normal file

Binary file not shown.

Binary file not shown.

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

BIN
gtk/emoji/en.data Normal file

Binary file not shown.

BIN
gtk/emoji/es.data Normal file

Binary file not shown.

BIN
gtk/emoji/fr.data Normal file

Binary file not shown.

View File

@@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<gresources>
<gresource prefix="/org/gtk/libgtk/emoji/">
<file>@lang@.data</file>
</gresource>
</gresources>

BIN
gtk/emoji/zh.data Normal file

Binary file not shown.

View File

@@ -86,7 +86,7 @@ for f in get_files('inspector', '.ui'):
xml += '''
<file>inspector/logo.png</file>
<file>inspector/inspector.css</file>
<file>emoji/emoji.data</file>
<file>emoji/en.data</file>
</gresource>
</gresources>'''

View File

@@ -38,6 +38,8 @@
#include "gtknative.h"
#include "gtkwidgetprivate.h"
#include "gdk/gdkprofilerprivate.h"
#include "gtkmain.h"
#include "gtkprivate.h"
/**
* SECTION:gtkemojichooser
@@ -184,7 +186,7 @@ typedef struct {
GtkWidget *box;
GtkWidget *heading;
GtkWidget *button;
const char *first;
int group;
gunichar label;
gboolean empty;
} EmojiSection;
@@ -337,8 +339,8 @@ add_recent_item (GtkEmojiChooser *chooser,
g_variant_ref (item);
g_variant_builder_init (&builder, G_VARIANT_TYPE ("a((auss)u)"));
g_variant_builder_add (&builder, "(@(auss)u)", item, modifier);
g_variant_builder_init (&builder, G_VARIANT_TYPE ("a((ausasu)u)"));
g_variant_builder_add (&builder, "(@(ausasu)u)", item, modifier);
children = NULL;
for (child = gtk_widget_get_last_child (chooser->recent.box);
@@ -363,7 +365,7 @@ add_recent_item (GtkEmojiChooser *chooser,
continue;
}
g_variant_builder_add (&builder, "(@(auss)u)", item2, modifier2);
g_variant_builder_add (&builder, "(@(ausasu)u)", item2, modifier2);
}
g_list_free (children);
@@ -591,6 +593,79 @@ add_emoji (GtkWidget *box,
gtk_flow_box_insert (GTK_FLOW_BOX (box), child, prepend ? 0 : -1);
}
GBytes *
get_emoji_data (void)
{
GBytes *bytes;
const char *lang;
char q[10];
char *path;
GError *error = NULL;
lang = pango_language_to_string (gtk_get_default_language ());
if (strchr (lang, '-'))
{
int i;
for (i = 0; lang[i] != '-' && i < 9; i++)
q[i] = lang[i];
q[i] = '\0';
lang = q;
}
path = g_strconcat ("/org/gtk/libgtk/emoji/", lang, ".data", NULL);
bytes = g_resources_lookup_data (path, 0, &error);
if (bytes)
{
g_debug ("Found emoji data for %s in resource %s", lang, path);
g_free (path);
return bytes;
}
if (g_error_matches (error, G_RESOURCE_ERROR, G_RESOURCE_ERROR_NOT_FOUND))
{
char *filename;
GMappedFile *file;
g_clear_error (&error);
filename = g_strconcat ("/usr/share/gtk-4.0/emoji/", lang, ".gresource", NULL);
file = g_mapped_file_new (filename, FALSE, NULL);
if (file)
{
GBytes *data;
GResource *resource;
data = g_mapped_file_get_bytes (file);
g_mapped_file_unref (file);
resource = g_resource_new_from_data (data, NULL);
g_bytes_unref (data);
g_debug ("Registering resource for Emoji data for %s from file %s", lang, filename);
g_resources_register (resource);
g_resource_unref (resource);
bytes = g_resources_lookup_data (path, 0, NULL);
if (bytes)
{
g_debug ("Found emoji data for %s in resource %s", lang, path);
g_free (path);
g_free (filename);
return bytes;
}
}
g_free (filename);
}
g_clear_error (&error);
g_free (path);
return g_resources_lookup_data ("/org/gtk/libgtk/emoji/en.data", 0, NULL);
}
static gboolean
populate_emoji_chooser (gpointer data)
{
@@ -602,8 +677,11 @@ populate_emoji_chooser (gpointer data)
if (!chooser->data)
{
GBytes *bytes = g_resources_lookup_data ("/org/gtk/libgtk/emoji/emoji.data", 0, NULL);
chooser->data = g_variant_ref_sink (g_variant_new_from_bytes (G_VARIANT_TYPE ("a(auss)"), bytes, TRUE));
GBytes *bytes;
bytes = get_emoji_data ();
chooser->data = g_variant_ref_sink (g_variant_new_from_bytes (G_VARIANT_TYPE ("a(ausasu)"), bytes, TRUE));
g_bytes_unref (bytes);
}
@@ -615,25 +693,27 @@ populate_emoji_chooser (gpointer data)
while ((item = g_variant_iter_next_value (chooser->iter)))
{
const char *name;
guint group;
g_variant_get_child (item, 1, "&s", &name);
g_variant_get_child (item, 3, "u", &group);
if (strcmp (name, chooser->body.first) == 0)
if (group == chooser->people.group)
chooser->box = chooser->people.box;
else if (group == chooser->body.group)
chooser->box = chooser->body.box;
else if (strcmp (name, chooser->nature.first) == 0)
else if (group == chooser->nature.group)
chooser->box = chooser->nature.box;
else if (strcmp (name, chooser->food.first) == 0)
else if (group == chooser->food.group)
chooser->box = chooser->food.box;
else if (strcmp (name, chooser->travel.first) == 0)
else if (group == chooser->travel.group)
chooser->box = chooser->travel.box;
else if (strcmp (name, chooser->activities.first) == 0)
else if (group == chooser->activities.group)
chooser->box = chooser->activities.box;
else if (strcmp (name, chooser->objects.first) == 0)
else if (group == chooser->objects.group)
chooser->box = chooser->objects.box;
else if (strcmp (name, chooser->symbols.first) == 0)
else if (group == chooser->symbols.group)
chooser->box = chooser->symbols.box;
else if (strcmp (name, chooser->flags.first) == 0)
else if (group == chooser->flags.group)
chooser->box = chooser->flags.box;
add_emoji (chooser->box, FALSE, item, 0, chooser);
@@ -710,6 +790,31 @@ adj_value_changed (GtkAdjustment *adj,
}
}
static gboolean
match_tokens (const char **term_tokens,
const char **hit_tokens)
{
int i, j;
gboolean matched;
matched = TRUE;
for (i = 0; term_tokens[i]; i++)
{
for (j = 0; hit_tokens[j]; j++)
if (g_str_has_prefix (hit_tokens[j], term_tokens[i]))
goto one_matched;
matched = FALSE;
break;
one_matched:
continue;
}
return matched;
}
static gboolean
filter_func (GtkFlowBoxChild *child,
gpointer data)
@@ -719,6 +824,9 @@ filter_func (GtkFlowBoxChild *child,
GVariant *emoji_data;
const char *text;
const char *name;
const char **keywords;
char **term_tokens;
char **name_tokens;
gboolean res;
res = TRUE;
@@ -733,8 +841,17 @@ filter_func (GtkFlowBoxChild *child,
if (!emoji_data)
goto out;
term_tokens = g_str_tokenize_and_fold (text, "en", NULL);
g_variant_get_child (emoji_data, 1, "&s", &name);
res = g_str_match_string (text, name, TRUE);
name_tokens = g_str_tokenize_and_fold (name, "en", NULL);
g_variant_get_child (emoji_data, 2, "^a&s", &keywords);
res = match_tokens ((const char **)term_tokens, (const char **)name_tokens) ||
match_tokens ((const char **)term_tokens, keywords);
g_strfreev (term_tokens);
g_strfreev (name_tokens);
out:
if (res)
@@ -811,11 +928,11 @@ stop_search (GtkEntry *entry,
static void
setup_section (GtkEmojiChooser *chooser,
EmojiSection *section,
const char *first,
const char *icon)
EmojiSection *section,
int group,
const char *icon)
{
section->first = first;
section->group = group;
gtk_button_set_icon_name (GTK_BUTTON (section->button), icon);
@@ -861,16 +978,16 @@ gtk_emoji_chooser_init (GtkEmojiChooser *chooser)
adj = gtk_scrolled_window_get_vadjustment (GTK_SCROLLED_WINDOW (chooser->scrolled_window));
g_signal_connect (adj, "value-changed", G_CALLBACK (adj_value_changed), chooser);
setup_section (chooser, &chooser->recent, NULL, "emoji-recent-symbolic");
setup_section (chooser, &chooser->people, "grinning face", "emoji-people-symbolic");
setup_section (chooser, &chooser->body, "selfie", "emoji-body-symbolic");
setup_section (chooser, &chooser->nature, "monkey face", "emoji-nature-symbolic");
setup_section (chooser, &chooser->food, "grapes", "emoji-food-symbolic");
setup_section (chooser, &chooser->travel, "globe showing Europe-Africa", "emoji-travel-symbolic");
setup_section (chooser, &chooser->activities, "jack-o-lantern", "emoji-activities-symbolic");
setup_section (chooser, &chooser->objects, "muted speaker", "emoji-objects-symbolic");
setup_section (chooser, &chooser->symbols, "ATM sign", "emoji-symbols-symbolic");
setup_section (chooser, &chooser->flags, "chequered flag", "emoji-flags-symbolic");
setup_section (chooser, &chooser->recent, -1, "emoji-recent-symbolic");
setup_section (chooser, &chooser->people, 0, "emoji-people-symbolic");
setup_section (chooser, &chooser->body, 1, "emoji-body-symbolic");
setup_section (chooser, &chooser->nature, 3, "emoji-nature-symbolic");
setup_section (chooser, &chooser->food, 4, "emoji-food-symbolic");
setup_section (chooser, &chooser->travel, 5, "emoji-travel-symbolic");
setup_section (chooser, &chooser->activities, 6, "emoji-activities-symbolic");
setup_section (chooser, &chooser->objects, 7, "emoji-objects-symbolic");
setup_section (chooser, &chooser->symbols, 8, "emoji-symbols-symbolic");
setup_section (chooser, &chooser->flags, 9, "emoji-flags-symbolic");
populate_recent_section (chooser);

View File

@@ -115,7 +115,7 @@ next:
break;
}
}
while (g_unichar_isalnum (g_utf8_get_char (p)) || *p == '_');
while (g_unichar_isalnum (g_utf8_get_char (p)) || *p == '_' || *p == ' ');
if (found_candidate)
n_matches = populate_completion (completion, p, 0);
@@ -488,7 +488,7 @@ add_emoji (GtkWidget *list,
GtkWidget *box;
PangoAttrList *attrs;
char text[64];
const char *shortname;
const char *name;
GtkWidget *stack;
gunichar modifier;
@@ -515,8 +515,8 @@ add_emoji (GtkWidget *list,
gtk_box_append (GTK_BOX (box), stack);
g_object_set_data (G_OBJECT (child), "stack", stack);
g_variant_get_child (emoji_data, 2, "&s", &shortname);
label = gtk_label_new (shortname);
g_variant_get_child (emoji_data, 1, "&s", &name);
label = gtk_label_new (name);
gtk_label_set_xalign (GTK_LABEL (label), 0);
gtk_stack_add_named (GTK_STACK (stack), label, "text");
@@ -572,10 +572,11 @@ populate_completion (GtkEmojiCompletion *completion,
g_variant_iter_init (&iter, completion->data);
while ((item = g_variant_iter_next_value (&iter)))
{
const char *shortname;
const char *name;
g_variant_get_child (item, 2, "&s", &shortname);
if (g_str_has_prefix (shortname, text))
g_variant_get_child (item, 1, "&s", &name);
if (g_str_has_prefix (name, text + 1))
{
n_matches++;
@@ -622,8 +623,8 @@ gtk_emoji_completion_init (GtkEmojiCompletion *completion)
gtk_widget_init_template (GTK_WIDGET (completion));
bytes = g_resources_lookup_data ("/org/gtk/libgtk/emoji/emoji.data", 0, NULL);
completion->data = g_variant_ref_sink (g_variant_new_from_bytes (G_VARIANT_TYPE ("a(auss)"), bytes, TRUE));
bytes = get_emoji_data ();
completion->data = g_variant_ref_sink (g_variant_new_from_bytes (G_VARIANT_TYPE ("a(ausasu)"), bytes, TRUE));
g_bytes_unref (bytes);

View File

@@ -126,6 +126,8 @@ void gtk_set_display_debug_flags (GdkDisplay *display,
guint gtk_get_display_debug_flags (GdkDisplay *display);
gboolean gtk_get_any_display_debug_flag_set (void);
GBytes *get_emoji_data (void);
#ifdef G_ENABLE_DEBUG
#define GTK_DISPLAY_DEBUG_CHECK(display,type) (gtk_get_any_display_debug_flag_set () && G_UNLIKELY (gtk_get_display_debug_flags (display) & GTK_DEBUG_##type))

View File

@@ -861,6 +861,23 @@ gtkresources = gnome.compile_resources('gtkresources',
extra_args: '--manual-register',
)
foreach lang : [ 'de', 'fr', 'es', 'zh' ]
conf = configuration_data()
conf.set('lang', lang)
resxml = configure_file(input: 'emoji/gresource.xml.in',
output: lang + '.gresource.xml',
configuration: conf
)
gnome.compile_resources(lang,
resxml,
source_dir: 'emoji',
gresource_bundle: true,
install: true,
install_dir: join_paths(gtk_datadir, 'gtk-4.0', 'emoji')
)
endforeach
gtk_x11_sources = files([
'gtkapplication-x11.c',
'gtkmountoperation-x11.c',

View File

@@ -2,14 +2,14 @@
<schemalist>
<schema id='org.gtk.gtk4.Settings.EmojiChooser' path='/org/gtk/gtk4/settings/emoji-chooser/'>
<key name='recent-emoji' type='a((auss)u)'>
<key name='recent-emoji' type='a((ausasu)u)'>
<default>[]</default>
<summary>Recently used Emoji</summary>
<description>
An array of Emoji definitions to show in the Emoji chooser. Each Emoji is
specified as an array of codepoints, name and shortname. The extra integer after this
pair is the code of the Fitzpatrick modifier to use in place of a 0 in the
codepoint array.
specified as an array of codepoints, name and keywords. The extra
integer after this pair is the code of the Fitzpatrick modifier to use in
place of a 0 in the codepoint array.
</description>
</key>
</schema>