169 lines
5.6 KiB
Go
169 lines
5.6 KiB
Go
package main
|
|
|
|
import (
|
|
"encoding/json"
|
|
"flag"
|
|
"fmt"
|
|
bleve "github.com/blevesearch/bleve/v2"
|
|
"github.com/go-zoox/fetch"
|
|
"github.com/sirupsen/logrus"
|
|
"os"
|
|
"strconv"
|
|
"strings"
|
|
)
|
|
|
|
var (
|
|
index bleve.Index
|
|
emojis []EmojiDescription
|
|
)
|
|
|
|
func usage() {
|
|
fmt.Fprintf(os.Stderr, "usage: go run main.go query\n")
|
|
flag.PrintDefaults()
|
|
os.Exit(2)
|
|
}
|
|
|
|
func main() {
|
|
flag.Usage = usage
|
|
flag.Parse()
|
|
|
|
args := flag.Args()
|
|
if len(args) < 1 {
|
|
fmt.Println("Query missing")
|
|
usage()
|
|
os.Exit(1)
|
|
}
|
|
|
|
err := indexEmojies()
|
|
if err != nil {
|
|
logrus.WithError(err).Error("Could not index")
|
|
}
|
|
results := Search(strings.Join(os.Args[1:], " "))
|
|
|
|
for _, result := range results {
|
|
fmt.Printf("%s - %s\n", result.Emoji, result.Description)
|
|
}
|
|
}
|
|
|
|
type EmojiDescription struct {
|
|
Emoji string `json:"emoji"`
|
|
Description string `json:"description"`
|
|
Category string `json:"category"`
|
|
Aliases []string `json:"aliases"`
|
|
Tags []string `json:"tags"`
|
|
HasSkinTones bool `json:"skin_tones,omitempty"`
|
|
UnicodeVersion string `json:"unicode_version"`
|
|
}
|
|
|
|
type GithubDescriptionResponse []EmojiDescription
|
|
|
|
func fetchEmojiFromGithub() (results []EmojiDescription, err error) {
|
|
response, err := fetch.Get("https://raw.githubusercontent.com/github/gemoji/master/db/emoji.json")
|
|
if err != nil {
|
|
return
|
|
}
|
|
err = json.Unmarshal(response.Body, &results)
|
|
return
|
|
}
|
|
|
|
func indexEmojies() error {
|
|
// we create a new indexMaping. I used the default one that will index all fields of my EmojiDescription
|
|
mapping := bleve.NewIndexMapping()
|
|
// we create the index instance
|
|
bleveIndex, err := bleve.NewMemOnly(mapping)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
// we fetch the emoji from the internet. This can fail, and may be embeded for better performance
|
|
e, err := fetchEmojiFromGithub()
|
|
if err != nil {
|
|
logrus.WithError(err).Error("Could fetch emoji list")
|
|
return err
|
|
}
|
|
emojis = enhanceEmojiListWithVariations(e)
|
|
for eNumber, eDescription := range emojis {
|
|
// this will index each item one by one. No need to be quick here for me, I can wait few ms for the program to start.
|
|
err = bleveIndex.Index(fmt.Sprintf("%d", eNumber), eDescription)
|
|
if err != nil {
|
|
logrus.WithError(err).Error("Could not index an emoji")
|
|
}
|
|
}
|
|
index = bleveIndex // we make the index available
|
|
return nil
|
|
}
|
|
|
|
func Search(q string) (results []EmojiDescription) {
|
|
if index == nil {
|
|
// no Index mean indexEmojies was not called yet or did not finished. No results (boot process)
|
|
return
|
|
}
|
|
// we create a query as bleve expect.
|
|
query := bleve.NewQueryStringQuery(q)
|
|
// we define the search options and limit to 200 results. This should be enough.
|
|
searchrequest := bleve.NewSearchRequestOptions(query, 200, 0, false)
|
|
// we do the search itself. This is the longest. Approximately few hundreds of us
|
|
searchresults, err := index.Search(searchrequest)
|
|
if err != nil {
|
|
logrus.WithError(err).Error("Could not search for an emoji")
|
|
return
|
|
}
|
|
|
|
// If we have no results we try to do a basic fuzzy search
|
|
if len(searchresults.Hits) == 0 {
|
|
// this time, we create a fuzzy query. The rest is the same as before. CopyPasta style.
|
|
fuzzyQuery := bleve.NewFuzzyQuery(q)
|
|
searchrequest := bleve.NewSearchRequestOptions(fuzzyQuery, 200, 0, false)
|
|
searchresults, err = index.Search(searchrequest)
|
|
if err != nil {
|
|
logrus.WithError(err).Error("Could not search for emoji")
|
|
return
|
|
}
|
|
}
|
|
// we return the results. I use the index to find my original object stored in `emojis` because it's simpler. Optimisation possible.
|
|
for _, result := range searchresults.Hits {
|
|
numIndex, _ := strconv.ParseInt(result.ID, 10, 64)
|
|
results = append(results, emojis[numIndex])
|
|
}
|
|
return
|
|
}
|
|
func enhanceEmojiListWithVariations(list []EmojiDescription) []EmojiDescription {
|
|
tones := map[string][]rune{
|
|
"light skin tone": []rune("\U0001F3FB"),
|
|
"medium-light skin tone": []rune("\U0001F3FC"),
|
|
"medium skin tone": []rune("\U0001F3FD"),
|
|
"medium-dark skin tone": []rune("\U0001F3FE"),
|
|
"dark skin tone": []rune("\U0001F3FF"),
|
|
}
|
|
for _, originalEmoji := range list {
|
|
// we only add variations for emoji that supports it
|
|
if originalEmoji.HasSkinTones {
|
|
// we do it for every skin tone
|
|
for skinToneName, tone := range tones {
|
|
// we make a copy of the emojiDescription
|
|
currentEmojiWithSkinTone := originalEmoji
|
|
|
|
// This is the important bit that took me hours to figure out
|
|
// we convert the emoji in rune (string -> []rune). An emoji can already be composed of multiple sub UTF8 characters, therefore multiple runes.
|
|
// we append to the list of runes the one for the skin tone.
|
|
// finally, we convert that in string using the type conversion. Using fmt would result in printing all runes independently
|
|
currentEmojiWithSkinTone.Emoji = string(append([]rune(currentEmojiWithSkinTone.Emoji), tone...))
|
|
|
|
// we adapt the description and metadata to match the skin tone
|
|
currentEmojiWithSkinTone.Description = fmt.Sprintf("%s %s", currentEmojiWithSkinTone.Description, skinToneName)
|
|
aliases := []string{}
|
|
for _, alias := range currentEmojiWithSkinTone.Aliases {
|
|
// we update all aliases to include the skin tone
|
|
aliases = append(aliases, fmt.Sprintf("%s_%s", alias, strings.ReplaceAll(strings.ReplaceAll(skinToneName, "-", "_"), " ", "_")))
|
|
}
|
|
currentEmojiWithSkinTone.Aliases = aliases
|
|
// I cleared the unicode version because some emoji with skin tone were added way after their original. I could parse the unicode list,
|
|
// but I'm a loafer, so I did not.
|
|
currentEmojiWithSkinTone.UnicodeVersion = ""
|
|
// we add the new emoji to the list
|
|
list = append(list, currentEmojiWithSkinTone)
|
|
}
|
|
}
|
|
}
|
|
return list
|
|
}
|