1 #!/usr/bin/env ruby
2 require 'json'
3 require 'set'
4
5 # Input format (https://emojibase.dev/) is:
6 #
7 # [
8 # {
9 # "label": "greedy skull farmer",
10 # "hexcode": "000000",
11 # "emoji": "X",
12 # "text": "",
13 # "type": 1,
14 # "version": 0
15 # "tags": ["greed","skull","farming",...]
16 # ...
17 # },
18 # ...
19 # ]
20 #
21 # This script removes many Emoji based on a variety of criteria:
22 #
23 # * Partial matching label names
24 # * Groups
25 # * Unicode versions
26 #
27 # Customize to your needs.
28
29 json_in = ARGF.read
30
31 if json_in.length < 1
32 puts "Oops, need JSON data. Pipe in or supply filename."
33 exit 1
34 end
35
36 list = JSON.parse(json_in)
37
38 # For the curious:
39 # This script often makes a 'newlist' between steps. This makes sense when you
40 # realize that these steps used to be a series of *individual* scripts - so
41 # this is still many times more efficient than spawning separate processes and
42 # serializing/deserializing JSON between steps!
43
44 # =============================================================================
45 # Group stuff
46 #
47 # Official group names:
48 # 0 Smileys & Emotion
49 # 1 People & Body
50 # 2 Components
51 # 3 Animals & Nature
52 # 4 Food & Drink
53 # 5 Travel & Places
54 # 6 Activities
55 # 7 Objects
56 # 8 Symbols
57 # 9 Flags
58
59
60 # Remove regional indicator letters. They're not meant to be used
61 # stand-alone and I've seen first hand that they are not widely
62 # supported as stand-alone characters (at least not yet).
63 newlist = []
64 list.each do |e|
65 if e['label'].match(/^regional indicator/)
66 next
67 end
68 newlist.push e
69 end
70 list = newlist
71
72 # Previously:
73 # Put regional indicator letters with the symbols group and re-sort.
74 # Add "letter" to the tags list
75 #list.each do |e|
76 # if e['label'].match(/^regional indicator/)
77 # e['group'] = 8 # "Symbols"
78 # if e['tags']
79 # e['tags'].push 'letter'
80 # else
81 # e['tags'] = ['letter']
82 # end
83 # end
84 #end
85 #newlist = list.sort_by! { |l| l["group"] }
86 #list = newlist
87
88 # Remove "facing right" variants
89 newlist = []
90 list.each do |e|
91 if e['label'].match(/facing right/)
92 next
93 end
94 newlist.push e
95 end
96 list = newlist
97
98 # Remove keycaps
99 newlist = []
100 list.each do |e|
101 if e['label'].match(/keycap:/)
102 next
103 end
104 newlist.push e
105 end
106 list = newlist
107
108 # Remove families (there's so many and I've never found a use for these!)
109 newlist = []
110 list.each do |e|
111 if e['label'].match(/family:/)
112 next
113 end
114 newlist.push e
115 end
116 list = newlist
117
118 # Remove genders (your tasteful joke goes here)
119 newlist = []
120 list.each do |e|
121 if e['label'].match(/(person|(wo)?man):? /)
122 next
123 end
124 newlist.push e
125 end
126 list = newlist
127
128 # Remove Japanese language elements (my audience doesn't speak it)
129 newlist = []
130 list.each do |e|
131 if e['label'].match(/^Japanese/)
132 next
133 end
134 newlist.push e
135 end
136 list = newlist
137
138 # Delete group 2 (Components)
139 newlist = []
140 list.each do |e|
141 if e["group"] == 2
142 next
143 end
144 newlist.push e
145 end
146 list = newlist
147
148 # Delete group 9 (Flags) - they're cool, but there's so many!
149 newlist = []
150 list.each do |e|
151 if e["group"] == 9
152 next
153 end
154 newlist.push e
155 end
156 list = newlist
157
158 # Sadly, I'm also going to remove versions greater than 13, even
159 # though it gets rid of some great emoji. My use case doesn't support
160 # yet.
161 newlist = []
162 list.each do |e|
163 if e["version"] > 13
164 next
165 end
166 newlist.push e
167 end
168 list = newlist
169
170 # There's only 49 unique uppercase words, so I don't think
171 # this is worth the loss of readability for data savings.
172 #
173 # Downcase labels
174 #list.each do |e|
175 # e["label"].downcase!
176 #end
177 #
178 # Downcase (and de-dupe) tags
179 #list.each do |e|
180 # e["tags"] = e["tags"].map(&:downcase).to_set.to_a
181 #end
182
183 newlist = []
184 list.each do |e|
185 newlist.push({
186 'label': e["label"],
187 'emoji': e["emoji"],
188 'group': e["group"],
189 'tags': e["tags"]
190 })
191 end
192 list = newlist
193
194 # Write out final list
195 #puts JSON.pretty_generate(list)
196 puts JSON.generate(list)