1 #!/usr/bin/env ruby
2 require 'json'
3 require 'set'
4
5 # JSON file needs Emojibase-formatted entries, see https://emojibase.dev
6 # Also see customizer.rb in this directory.
7
8 json_in = ARGF.read
9
10 my_emoji = JSON.parse(json_in)
11
12 # Output target format example (taken from original dev test data)
13 #
14 # FC.data = {
15 # groups: [
16 # { title: "People" , emoji: "ð", range: [0,2] },
17 # { title: "Natural" , emoji: "ðī", range: [4,5] },
18 # { title: "Activity", emoji: "ð§", range: [6,6] },
19 # { title: "Things" , emoji: "ðŧïļ", range: [0,0] },
20 # ],
21 # tags: {
22 # "face": [0,1,2,4],
23 # "wacky": [1],
24 # "cool": [2],
25 # "bear": [3,6],
26 # "pig": [4],
27 # "owl": [5],
28 # "animal": [3,4,5],
29 # "teddy": [6],
30 # },
31 # emoji: [
32 # "ð", // 0 grinning face
33 # "ðĪŠ", // 1 wacky face
34 # "ð", // 2 cool face with sunglasses
35 # "ðŧ", // 3 bear
36 # "ð·", // 4 pig face
37 # "ðĶ", // 5 owl
38 # "ð§ļ", // 6 teddy bear
39 # ],
40 # };
41
42 # Begin!
43 puts "FC.data = {"
44
45 # Make group list. For screen real estate, I've combined some of the groups
46 # together (original_groups). These groups become selectable "tab" filters in
47 # the final interface.
48 my_groups = [
49 # Official group names:
50 # 0 Smileys & Emotion
51 # 1 People & Body
52 # 2 Components
53 # 3 Animals & Nature
54 # 4 Food & Drink
55 # 5 Travel & Places
56 # 6 Activities
57 # 7 Objects
58 # 8 Symbols
59 # 9 Flags
60 { title: "People" , emoji: "ð", from_groups: [0,1], range: [nil,0] },
61 { title: "Natural" , emoji: "ðī", from_groups: [3,4], range: [nil,0] },
62 { title: "Activity", emoji: "ð§", from_groups: [5,6], range: [nil,0] },
63 { title: "Things" , emoji: "ðŧïļ", from_groups: [7,8], range: [nil,0] },
64 ]
65
66 # Find first and last (range) emoji for each group
67 my_emoji.each_with_index do |e, i|
68 #puts "#{i} #{e["group"]}"
69 # is this group one of the from_groups?
70 g = my_groups.find { |g| g[:from_groups].include?(e["group"]) }
71 if g
72 # puts "#{i} vs #{g[:range][0]} - #{g[:range][1]}"
73 if g[:range][0] === nil
74 g[:range][0] = i # first!
75 end
76 if i > g[:range][1]
77 g[:range][1] = i # maybe last
78 end
79 end
80 end
81
82 # Print groups (not just turning the whole thing over
83 # to JSON.generate because I want to have explicit
84 # control over the pretty-printing as a compactness vs.
85 # readability balance. Since this output is really JS,
86 # not strict JSON, I can have trailing commas and all
87 # that good stuff, which simplifies things quite a bit.
88 group_strs = []
89 my_groups.each do |g|
90 g.delete(:from_groups)
91 group_strs.push " #{JSON.generate(g)}"
92 end
93 puts "groups: [\n#{group_strs.join(",\n")}\n],"
94
95 # Remove tags that are in an entry's label
96 my_emoji.each do |e|
97 new_tags = e["tags"].filter do |t|
98 !e["label"].include?(t)
99 end
100 e["tags"] = new_tags
101 end
102
103 # Get all "word" usage counts from all tags and labels
104 word_usage = {}
105 my_emoji.each do |e|
106 these_words = e["tags"].to_set
107 these_words.merge e["label"].split(' ')
108
109 these_words.each do |word|
110 if word_usage.key?(word)
111 word_usage[word] += 1
112 else
113 word_usage[word] = 1
114 end
115 end
116 end
117
118
119 # Word parameters to adjust for best results
120 # Both of these will work with 1 or higher
121 min_word_usage_count = 4
122 min_word_length = 4
123
124 if !ENV['MIN_WORD_USAGE_COUNT'].nil?
125 min_word_usage_count = ENV['MIN_WORD_USAGE_COUNT'].to_i
126 end
127
128 if !ENV['MIN_WORD_LENGTH'].nil?
129 min_word_length = ENV['MIN_WORD_LENGTH'].to_i
130 end
131
132 # TODO allow input of the above in ARGV to override the defaults so I can
133 # automate the 25 or so permutations and get the output bytes for comparison to
134 # see which one is smallest. THEN change the defaults to match!
135
136 # Convert word list to array and enforce parameters
137 word_usage_list = []
138 word_usage.each do |word,count|
139 if count >= min_word_usage_count &&
140 word.length >= min_word_length
141 word_usage_list.push [word,count]
142 end
143 end
144
145 # Sort by usage count so more frequent words have lower index numbers
146 # (literally just for the savings of a shorter number of digits)
147 word_usage_list.sort_by! { |w| w[1] }.reverse!
148
149 # Turn usage list into array (just the word (0th position))
150 my_words = word_usage_list.map { |w| w[0] }
151
152 line_len = 0
153 first = true
154 print "words: '"
155 my_words.each_with_index do |w|
156 if line_len + w.length > 70
157 # don't add to a long line, start a new one
158 print "'\n+'"
159 line_len = 0
160 end
161 if first
162 first = false
163 else
164 print ' '
165 end
166 print w
167 line_len += w.length
168 end
169 puts "',"
170
171 # Replace any words from list in labels with, e.g. $15, $256
172 #
173 # Surprisingly, there are NO tags or labels with '$' in them
174 # (see check_for_dollar_tags.rb)
175 my_labels = []
176 my_emoji.each do |e|
177 label_strings = []
178 e['label'].split(' ').each do |word|
179 idx = my_words.find_index(word)
180 if idx.nil?
181 # not in word list, push verbatim word
182 label_strings.push(word)
183 else
184 label_strings.push("$#{idx}")
185 end
186 end
187 my_labels.push label_strings.join(' ')
188 end
189
190 # Make tag reference list:
191 # - exclude if tag can be found in the label
192 # - in the word list: use number
193 # - not in word list: use verbatim string
194 my_tags = []
195 my_emoji.each do |e|
196 these_tags = []
197 e["tags"].each do |t|
198 if e["label"].include?(t)
199 puts "already in label: #{t}"
200 next # it's already a word in the label
201 end
202 idx = my_words.find_index(t)
203 if idx.nil?
204 # not in word list, push verbatim tag
205 these_tags.push(t)
206 else
207 these_tags.push("$#{idx}")
208 end
209 end
210 my_tags.push these_tags.join(' ')
211 end
212
213 # Print emoji
214 # Collate in the labels and tags.
215 # As an array of arrays in this index order
216 # 0: emoji glyph
217 # 1: label string
218 # 2: tag string
219 # Example: ['X','winking $0',[2,'fart',17]]
220 #
221 line_len = 0
222 puts "emoji: ["
223 my_emoji.each_with_index do |e, i|
224 str = "['#{e['emoji']}','#{my_labels[i]}','#{my_tags[i]}'],"
225 if line_len + str.length > 80
226 # don't add to a long line, start a new one
227 puts
228 line_len = 0
229 end
230 print str
231 line_len += str.length
232 end
233
234 puts
235 puts "] // End of FC.data.emoji"
236 puts "}; // End of FC.data"