emoji-search-engine-go/main.go

169 lines
5.6 KiB
Go

package main
import (
"encoding/json"
"flag"
"fmt"
bleve "github.com/blevesearch/bleve/v2"
"github.com/go-zoox/fetch"
"github.com/sirupsen/logrus"
"os"
"strconv"
"strings"
)
var (
index bleve.Index
emojis []EmojiDescription
)
func usage() {
fmt.Fprintf(os.Stderr, "usage: go run main.go query\n")
flag.PrintDefaults()
os.Exit(2)
}
func main() {
flag.Usage = usage
flag.Parse()
args := flag.Args()
if len(args) < 1 {
fmt.Println("Query missing")
usage()
os.Exit(1)
}
err := indexEmojies()
if err != nil {
logrus.WithError(err).Error("Could not index")
}
results := Search(strings.Join(os.Args[1:], " "))
for _, result := range results {
fmt.Printf("%s - %s\n", result.Emoji, result.Description)
}
}
type EmojiDescription struct {
Emoji string `json:"emoji"`
Description string `json:"description"`
Category string `json:"category"`
Aliases []string `json:"aliases"`
Tags []string `json:"tags"`
HasSkinTones bool `json:"skin_tones,omitempty"`
UnicodeVersion string `json:"unicode_version"`
}
type GithubDescriptionResponse []EmojiDescription
func fetchEmojiFromGithub() (results []EmojiDescription, err error) {
response, err := fetch.Get("https://raw.githubusercontent.com/github/gemoji/master/db/emoji.json")
if err != nil {
return
}
err = json.Unmarshal(response.Body, &results)
return
}
func indexEmojies() error {
// we create a new indexMaping. I used the default one that will index all fields of my EmojiDescription
mapping := bleve.NewIndexMapping()
// we create the index instance
bleveIndex, err := bleve.NewMemOnly(mapping)
if err != nil {
return err
}
// we fetch the emoji from the internet. This can fail, and may be embeded for better performance
e, err := fetchEmojiFromGithub()
if err != nil {
logrus.WithError(err).Error("Could fetch emoji list")
return err
}
emojis = enhanceEmojiListWithVariations(e)
for eNumber, eDescription := range emojis {
// this will index each item one by one. No need to be quick here for me, I can wait few ms for the program to start.
err = bleveIndex.Index(fmt.Sprintf("%d", eNumber), eDescription)
if err != nil {
logrus.WithError(err).Error("Could not index an emoji")
}
}
index = bleveIndex // we make the index available
return nil
}
func Search(q string) (results []EmojiDescription) {
if index == nil {
// no Index mean indexEmojies was not called yet or did not finished. No results (boot process)
return
}
// we create a query as bleve expect.
query := bleve.NewQueryStringQuery(q)
// we define the search options and limit to 200 results. This should be enough.
searchrequest := bleve.NewSearchRequestOptions(query, 200, 0, false)
// we do the search itself. This is the longest. Approximately few hundreds of us
searchresults, err := index.Search(searchrequest)
if err != nil {
logrus.WithError(err).Error("Could not search for an emoji")
return
}
// If we have no results we try to do a basic fuzzy search
if len(searchresults.Hits) == 0 {
// this time, we create a fuzzy query. The rest is the same as before. CopyPasta style.
fuzzyQuery := bleve.NewFuzzyQuery(q)
searchrequest := bleve.NewSearchRequestOptions(fuzzyQuery, 200, 0, false)
searchresults, err = index.Search(searchrequest)
if err != nil {
logrus.WithError(err).Error("Could not search for emoji")
return
}
}
// we return the results. I use the index to find my original object stored in `emojis` because it's simpler. Optimisation possible.
for _, result := range searchresults.Hits {
numIndex, _ := strconv.ParseInt(result.ID, 10, 64)
results = append(results, emojis[numIndex])
}
return
}
func enhanceEmojiListWithVariations(list []EmojiDescription) []EmojiDescription {
tones := map[string][]rune{
"light skin tone": []rune("\U0001F3FB"),
"medium-light skin tone": []rune("\U0001F3FC"),
"medium skin tone": []rune("\U0001F3FD"),
"medium-dark skin tone": []rune("\U0001F3FE"),
"dark skin tone": []rune("\U0001F3FF"),
}
for _, originalEmoji := range list {
// we only add variations for emoji that supports it
if originalEmoji.HasSkinTones {
// we do it for every skin tone
for skinToneName, tone := range tones {
// we make a copy of the emojiDescription
currentEmojiWithSkinTone := originalEmoji
// This is the important bit that took me hours to figure out
// we convert the emoji in rune (string -> []rune). An emoji can already be composed of multiple sub UTF8 characters, therefore multiple runes.
// we append to the list of runes the one for the skin tone.
// finally, we convert that in string using the type conversion. Using fmt would result in printing all runes independently
currentEmojiWithSkinTone.Emoji = string(append([]rune(currentEmojiWithSkinTone.Emoji), tone...))
// we adapt the description and metadata to match the skin tone
currentEmojiWithSkinTone.Description = fmt.Sprintf("%s %s", currentEmojiWithSkinTone.Description, skinToneName)
aliases := []string{}
for _, alias := range currentEmojiWithSkinTone.Aliases {
// we update all aliases to include the skin tone
aliases = append(aliases, fmt.Sprintf("%s_%s", alias, strings.ReplaceAll(strings.ReplaceAll(skinToneName, "-", "_"), " ", "_")))
}
currentEmojiWithSkinTone.Aliases = aliases
// I cleared the unicode version because some emoji with skin tone were added way after their original. I could parse the unicode list,
// but I'm a loafer, so I did not.
currentEmojiWithSkinTone.UnicodeVersion = ""
// we add the new emoji to the list
list = append(list, currentEmojiWithSkinTone)
}
}
}
return list
}