This commit is contained in:
ztec 2023-04-02 02:51:20 +02:00
commit 940759a874
3 changed files with 295 additions and 0 deletions

43
go.mod Normal file
View File

@ -0,0 +1,43 @@
module git2.riper.fr/ztec/emoji-search-engine-go
go 1.20
require (
github.com/blevesearch/bleve/v2 v2.3.7
github.com/go-zoox/fetch v1.7.6
github.com/sirupsen/logrus v1.9.0
)
require (
github.com/RoaringBitmap/roaring v0.9.4 // indirect
github.com/bits-and-blooms/bitset v1.2.0 // indirect
github.com/blevesearch/bleve_index_api v1.0.5 // indirect
github.com/blevesearch/geo v0.1.17 // indirect
github.com/blevesearch/go-porterstemmer v1.0.3 // indirect
github.com/blevesearch/gtreap v0.1.1 // indirect
github.com/blevesearch/mmap-go v1.0.4 // indirect
github.com/blevesearch/scorch_segment_api/v2 v2.1.4 // indirect
github.com/blevesearch/segment v0.9.1 // indirect
github.com/blevesearch/snowballstem v0.9.0 // indirect
github.com/blevesearch/upsidedown_store_api v1.0.2 // indirect
github.com/blevesearch/vellum v1.0.9 // indirect
github.com/blevesearch/zapx/v11 v11.3.7 // indirect
github.com/blevesearch/zapx/v12 v12.3.7 // indirect
github.com/blevesearch/zapx/v13 v13.3.7 // indirect
github.com/blevesearch/zapx/v14 v14.3.7 // indirect
github.com/blevesearch/zapx/v15 v15.3.9 // indirect
github.com/go-zoox/core-utils v1.0.13 // indirect
github.com/go-zoox/headers v1.0.4 // indirect
github.com/golang/geo v0.0.0-20210211234256-740aa86cb551 // indirect
github.com/golang/protobuf v1.3.2 // indirect
github.com/golang/snappy v0.0.1 // indirect
github.com/json-iterator/go v0.0.0-20171115153421-f7279a603ede // indirect
github.com/mschoch/smat v0.2.0 // indirect
github.com/tidwall/gjson v1.14.0 // indirect
github.com/tidwall/match v1.1.1 // indirect
github.com/tidwall/pretty v1.2.0 // indirect
go.etcd.io/bbolt v1.3.5 // indirect
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3 // indirect
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)

84
go.sum Normal file
View File

@ -0,0 +1,84 @@
github.com/RoaringBitmap/roaring v0.9.4 h1:ckvZSX5gwCRaJYBNe7syNawCU5oruY9gQmjXlp4riwo=
github.com/RoaringBitmap/roaring v0.9.4/go.mod h1:icnadbWcNyfEHlYdr+tDlOTih1Bf/h+rzPpv4sbomAA=
github.com/bits-and-blooms/bitset v1.2.0 h1:Kn4yilvwNtMACtf1eYDlG8H77R07mZSPbMjLyS07ChA=
github.com/bits-and-blooms/bitset v1.2.0/go.mod h1:gIdJ4wp64HaoK2YrL1Q5/N7Y16edYb8uY+O0FJTyyDA=
github.com/blevesearch/bleve/v2 v2.3.7 h1:nIfIrhv28tvgBpbVF8Dq7/U1zW/YiwSqg/PBgE3x8bo=
github.com/blevesearch/bleve/v2 v2.3.7/go.mod h1:2tToYD6mDeseIA13jcZiEEqYrVLg6xdk0v6+F7dWquU=
github.com/blevesearch/bleve_index_api v1.0.5 h1:Lc986kpC4Z0/n1g3gg8ul7H+lxgOQPcXb9SxvQGu+tw=
github.com/blevesearch/bleve_index_api v1.0.5/go.mod h1:YXMDwaXFFXwncRS8UobWs7nvo0DmusriM1nztTlj1ms=
github.com/blevesearch/geo v0.1.17 h1:AguzI6/5mHXapzB0gE9IKWo+wWPHZmXZoscHcjFgAFA=
github.com/blevesearch/geo v0.1.17/go.mod h1:uRMGWG0HJYfWfFJpK3zTdnnr1K+ksZTuWKhXeSokfnM=
github.com/blevesearch/go-porterstemmer v1.0.3 h1:GtmsqID0aZdCSNiY8SkuPJ12pD4jI+DdXTAn4YRcHCo=
github.com/blevesearch/go-porterstemmer v1.0.3/go.mod h1:angGc5Ht+k2xhJdZi511LtmxuEf0OVpvUUNrwmM1P7M=
github.com/blevesearch/gtreap v0.1.1 h1:2JWigFrzDMR+42WGIN/V2p0cUvn4UP3C4Q5nmaZGW8Y=
github.com/blevesearch/gtreap v0.1.1/go.mod h1:QaQyDRAT51sotthUWAH4Sj08awFSSWzgYICSZ3w0tYk=
github.com/blevesearch/mmap-go v1.0.4 h1:OVhDhT5B/M1HNPpYPBKIEJaD0F3Si+CrEKULGCDPWmc=
github.com/blevesearch/mmap-go v1.0.4/go.mod h1:EWmEAOmdAS9z/pi/+Toxu99DnsbhG1TIxUoRmJw/pSs=
github.com/blevesearch/scorch_segment_api/v2 v2.1.4 h1:LmGmo5twU3gV+natJbKmOktS9eMhokPGKWuR+jX84vk=
github.com/blevesearch/scorch_segment_api/v2 v2.1.4/go.mod h1:PgVnbbg/t1UkgezPDu8EHLi1BHQ17xUwsFdU6NnOYS0=
github.com/blevesearch/segment v0.9.1 h1:+dThDy+Lvgj5JMxhmOVlgFfkUtZV2kw49xax4+jTfSU=
github.com/blevesearch/segment v0.9.1/go.mod h1:zN21iLm7+GnBHWTao9I+Au/7MBiL8pPFtJBJTsk6kQw=
github.com/blevesearch/snowballstem v0.9.0 h1:lMQ189YspGP6sXvZQ4WZ+MLawfV8wOmPoD/iWeNXm8s=
github.com/blevesearch/snowballstem v0.9.0/go.mod h1:PivSj3JMc8WuaFkTSRDW2SlrulNWPl4ABg1tC/hlgLs=
github.com/blevesearch/upsidedown_store_api v1.0.2 h1:U53Q6YoWEARVLd1OYNc9kvhBMGZzVrdmaozG2MfoB+A=
github.com/blevesearch/upsidedown_store_api v1.0.2/go.mod h1:M01mh3Gpfy56Ps/UXHjEO/knbqyQ1Oamg8If49gRwrQ=
github.com/blevesearch/vellum v1.0.9 h1:PL+NWVk3dDGPCV0hoDu9XLLJgqU4E5s/dOeEJByQ2uQ=
github.com/blevesearch/vellum v1.0.9/go.mod h1:ul1oT0FhSMDIExNjIxHqJoGpVrBpKCdgDQNxfqgJt7k=
github.com/blevesearch/zapx/v11 v11.3.7 h1:Y6yIAF/DVPiqZUA/jNgSLXmqewfzwHzuwfKyfdG+Xaw=
github.com/blevesearch/zapx/v11 v11.3.7/go.mod h1:Xk9Z69AoAWIOvWudNDMlxJDqSYGf90LS0EfnaAIvXCA=
github.com/blevesearch/zapx/v12 v12.3.7 h1:DfQ6rsmZfEK4PzzJJRXjiM6AObG02+HWvprlXQ1Y7eI=
github.com/blevesearch/zapx/v12 v12.3.7/go.mod h1:SgEtYIBGvM0mgIBn2/tQE/5SdrPXaJUaT/kVqpAPxm0=
github.com/blevesearch/zapx/v13 v13.3.7 h1:igIQg5eKmjw168I7av0Vtwedf7kHnQro/M+ubM4d2l8=
github.com/blevesearch/zapx/v13 v13.3.7/go.mod h1:yyrB4kJ0OT75UPZwT/zS+Ru0/jYKorCOOSY5dBzAy+s=
github.com/blevesearch/zapx/v14 v14.3.7 h1:gfe+fbWslDWP/evHLtp/GOvmNM3sw1BbqD7LhycBX20=
github.com/blevesearch/zapx/v14 v14.3.7/go.mod h1:9J/RbOkqZ1KSjmkOes03AkETX7hrXT0sFMpWH4ewC4w=
github.com/blevesearch/zapx/v15 v15.3.9 h1:/s9zqKxFaZKQTTcMO2b/Tup0ch5MSztlvw+frVDfIBk=
github.com/blevesearch/zapx/v15 v15.3.9/go.mod h1:m7Y6m8soYUvS7MjN9eKlz1xrLCcmqfFadmu7GhWIrLY=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/go-zoox/core-utils v1.0.13 h1:aI20hoVvPjVfJSgy01XUliEdIvvMmdlvEcrXBg7Pbkc=
github.com/go-zoox/core-utils v1.0.13/go.mod h1:EknM3KLL6/kagL95wZbWE7mRRcYCG/fHqiJ/EH0ihAs=
github.com/go-zoox/fetch v1.7.6 h1:bBW/RUJmhWUJ4jHtUAybZYcnNX8KQZbLUZ/aUI4R9Cw=
github.com/go-zoox/fetch v1.7.6/go.mod h1:0c3KKgCARMZ2IaZo6SvaXjQeQV/SEz+Hn9u7V1Sd7gk=
github.com/go-zoox/headers v1.0.4 h1:dkAUngO2bp7L28VZ80t1IezwOB8xkFfNKJ2IhpZY/2Y=
github.com/go-zoox/headers v1.0.4/go.mod h1:WEgEbewswEw4n4qS1iG68Kn/vOQVCAKGwwuZankc6so=
github.com/golang/geo v0.0.0-20210211234256-740aa86cb551 h1:gtexQ/VGyN+VVFRXSFiguSNcXmS6rkKT+X7FdIrTtfo=
github.com/golang/geo v0.0.0-20210211234256-740aa86cb551/go.mod h1:QZ0nwyI2jOfgRAoBvP+ab5aRr7c9x7lhGEJrKvBwjWI=
github.com/golang/protobuf v1.3.2 h1:6nsPYzhq5kReh6QImI3k5qWzO4PEbvbIW2cwSfR/6xs=
github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4=
github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/json-iterator/go v0.0.0-20171115153421-f7279a603ede h1:YrgBGwxMRK0Vq0WSCWFaZUnTsrA/PZE/xs1QZh+/edg=
github.com/json-iterator/go v0.0.0-20171115153421-f7279a603ede/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
github.com/mschoch/smat v0.2.0 h1:8imxQsjDm8yFEAVBe7azKmKSgzSkZXDuKkSq9374khM=
github.com/mschoch/smat v0.2.0/go.mod h1:kc9mz7DoBKqDyiRL7VZN8KvXQMWeTaVnttLRXOlotKw=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/sirupsen/logrus v1.9.0 h1:trlNQbNUG3OdDrDil03MCb1H2o9nJ1x4/5LYw7byDE0=
github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/tidwall/gjson v1.14.0 h1:6aeJ0bzojgWLa82gDQHcx3S0Lr/O51I9bJ5nv6JFx5w=
github.com/tidwall/gjson v1.14.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA=
github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
github.com/tidwall/pretty v1.2.0 h1:RWIZEg2iJ8/g6fDDYzMpobmaoGh5OLl4AXtGUGPcqCs=
github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
go.etcd.io/bbolt v1.3.5 h1:XAzx9gjCb0Rxj7EoqcClPD1d5ZBxZJk0jbuoPHenBt0=
go.etcd.io/bbolt v1.3.5/go.mod h1:G5EMThwa9y8QZGBClrRx5EY+Yw9kAhnjy3bSjsnlVTQ=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3 h1:0GoQqolDA55aaLxZyTzK/Y2ePZzZTUrRacwib7cNsYQ=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8 h1:0A+M6Uqn+Eje4kHMK80dtF3JCXC4ykBgQG4Fe06QRhQ=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f h1:v4INt8xihDGvnrfjMDVXGxw9wrfxYyCjk0KbXjhR55s=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

168
main.go Normal file
View File

@ -0,0 +1,168 @@
package main
import (
"encoding/json"
"flag"
"fmt"
bleve "github.com/blevesearch/bleve/v2"
"github.com/go-zoox/fetch"
"github.com/sirupsen/logrus"
"os"
"strconv"
"strings"
)
var (
index bleve.Index
emojis []EmojiDescription
)
func usage() {
fmt.Fprintf(os.Stderr, "usage: go run main.go query\n")
flag.PrintDefaults()
os.Exit(2)
}
func main() {
flag.Usage = usage
flag.Parse()
args := flag.Args()
if len(args) < 1 {
fmt.Println("Query missing")
usage()
os.Exit(1)
}
err := indexEmojies()
if err != nil {
logrus.WithError(err).Error("Could not index")
}
results := Search(strings.Join(os.Args[1:], " "))
for _, result := range results {
fmt.Printf("%s - %s\n", result.Emoji, result.Description)
}
}
type EmojiDescription struct {
Emoji string `json:"emoji"`
Description string `json:"description"`
Category string `json:"category"`
Aliases []string `json:"aliases"`
Tags []string `json:"tags"`
HasSkinTones bool `json:"skin_tones,omitempty"`
UnicodeVersion string `json:"unicode_version"`
}
type GithubDescriptionResponse []EmojiDescription
func fetchEmojiFromGithub() (results []EmojiDescription, err error) {
response, err := fetch.Get("https://raw.githubusercontent.com/github/gemoji/master/db/emoji.json")
if err != nil {
return
}
err = json.Unmarshal(response.Body, &results)
return
}
func indexEmojies() error {
// we create a new indexMaping. I used the default one that will index all fields of my EmojiDescription
mapping := bleve.NewIndexMapping()
// we create the index instance
bleveIndex, err := bleve.NewMemOnly(mapping)
if err != nil {
return err
}
// we fetch the emoji from the internet. This can fail, and may be embeded for better performance
e, err := fetchEmojiFromGithub()
if err != nil {
logrus.WithError(err).Error("Could fetch emoji list")
return err
}
emojis = enhanceEmojiListWithVariations(e)
for eNumber, eDescription := range emojis {
// this will index each item one by one. No need to be quick here for me, I can wait few ms for the program to start.
err = bleveIndex.Index(fmt.Sprintf("%d", eNumber), eDescription)
if err != nil {
logrus.WithError(err).Error("Could not index an emoji")
}
}
index = bleveIndex // we make the index available
return nil
}
func Search(q string) (results []EmojiDescription) {
if index == nil {
// no Index mean indexEmojies was not called yet or did not finished. No results (boot process)
return
}
// we create a query as bleve expect.
query := bleve.NewQueryStringQuery(q)
// we define the search options and limit to 200 results. This should be enough.
searchrequest := bleve.NewSearchRequestOptions(query, 200, 0, false)
// we do the search itself. This is the longest. Approximately few hundreds of us
searchresults, err := index.Search(searchrequest)
if err != nil {
logrus.WithError(err).Error("Could not search for an emoji")
return
}
// If we have no results we try to do a basic fuzzy search
if len(searchresults.Hits) == 0 {
// this time, we create a fuzzy query. The rest is the same as before. CopyPasta style.
fuzzyQuery := bleve.NewFuzzyQuery(q)
searchrequest := bleve.NewSearchRequestOptions(fuzzyQuery, 200, 0, false)
searchresults, err = index.Search(searchrequest)
if err != nil {
logrus.WithError(err).Error("Could not search for emoji")
return
}
}
// we return the results. I use the index to find my original object stored in `emojis` because it's simpler. Optimisation possible.
for _, result := range searchresults.Hits {
numIndex, _ := strconv.ParseInt(result.ID, 10, 64)
results = append(results, emojis[numIndex])
}
return
}
func enhanceEmojiListWithVariations(list []EmojiDescription) []EmojiDescription {
tones := map[string][]rune{
"light skin tone": []rune("\U0001F3FB"),
"medium-light skin tone": []rune("\U0001F3FC"),
"medium skin tone": []rune("\U0001F3FD"),
"medium-dark skin tone": []rune("\U0001F3FE"),
"dark skin tone": []rune("\U0001F3FF"),
}
for _, originalEmoji := range list {
// we only add variations for emoji that supports it
if originalEmoji.HasSkinTones {
// we do it for every skin tone
for skinToneName, tone := range tones {
// we make a copy of the emojiDescription
currentEmojiWithSkinTone := originalEmoji
// This is the important bit that took me hours to figure out
// we convert the emoji in rune (string -> []rune). An emoji can already be composed of multiple sub UTF8 characters, therefore multiple runes.
// we append to the list of runes the one for the skin tone.
// finally, we convert that in string using the type conversion. Using fmt would result in printing all runes independently
currentEmojiWithSkinTone.Emoji = string(append([]rune(currentEmojiWithSkinTone.Emoji), tone...))
// we adapt the description and metadata to match the skin tone
currentEmojiWithSkinTone.Description = fmt.Sprintf("%s %s", currentEmojiWithSkinTone.Description, skinToneName)
aliases := []string{}
for _, alias := range currentEmojiWithSkinTone.Aliases {
// we update all aliases to include the skin tone
aliases = append(aliases, fmt.Sprintf("%s_%s", alias, strings.ReplaceAll(strings.ReplaceAll(skinToneName, "-", "_"), " ", "_")))
}
currentEmojiWithSkinTone.Aliases = aliases
// I cleared the unicode version because some emoji with skin tone were added way after their original. I could parse the unicode list,
// but I'm a loafer, so I did not.
currentEmojiWithSkinTone.UnicodeVersion = ""
// we add the new emoji to the list
list = append(list, currentEmojiWithSkinTone)
}
}
}
return list
}