Files
oddmu/tokenizer_test.go
Alex Schroeder e1ba007f97 Don't link hashtags in link text
This change depends on a change to the markdown library. Specifically,
the parser's InsideLink must be public.

This means that the #like_this hashtag from the README.md in the
source directory is no longer available, so that test had to be
rewritten.

Another change to reduce the number of false hashtags was to use the
hashtag parser for all situations: It's also used to identify hashtags
in the search query string. The parser doesn't automatically turn the
matches to lower-case, however, so that has to be done when indexing
documents and when searching for hashtags.

The hashtags command for the commandline no longer prints a hash for
all the tags.
2025-02-07 20:05:36 +01:00

58 lines
1.8 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package main
import (
"fmt"
"github.com/stretchr/testify/assert"
"testing"
)
func TestHashtags(t *testing.T) {
assert.EqualValues(t, []string{"Truth"}, hashtags([]byte("This is boring. #Truth")), "hashtags")
}
func TestEscapedHashtags(t *testing.T) {
assert.EqualValues(t, []string{}, hashtags([]byte("This is not a hashtag: \\#False")), "escaped hashtags")
}
func TestBorkedHashtags(t *testing.T) {
assert.EqualValues(t, []string{}, hashtags([]byte("This is borked: \\#")), "borked hashtag")
}
func TestTokensAndPredicates(t *testing.T) {
predicates, terms := predicatesAndTokens("foo title:bar")
assert.EqualValues(t, []string{"foo"}, terms)
assert.EqualValues(t, []string{"title:bar"}, predicates)
}
func TestQuoteRunes(t *testing.T) {
s := `'"‘’‘‚“”„«»«‹›‹「」「」『』`
for _, rune := range s {
assert.True(t, IsQuote(rune), fmt.Sprintf("%c is a quote", rune))
}
}
func TestQuotes(t *testing.T) {
s := `'foo' "foo" foo foo foo “foo” „foo“ ”foo” «foo» »foo« foo foo 「foo」
「foo」 『foo』`
tokens := tokenizeWithQuotes(s)
assert.EqualValues(t, []string{
"foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo", "foo",
"", ""}, tokens)
}
func TestPhrases(t *testing.T) {
s := `look for 'foo bar'`
tokens := tokenizeWithQuotes(s)
assert.EqualValues(t, []string{"look", "for", "foo bar"}, tokens)
}
func TestKlingon(t *testing.T) {
s := `quSDaq balua`
tokens := tokenizeWithQuotes(s)
assert.EqualValues(t, []string{"quSDaq", "balua"}, tokens)
// quotes at the beginning of a word are not handled correctly
s = `nuqDaq oH tache`
tokens = tokenizeWithQuotes(s)
assert.EqualValues(t, []string{"nuqDaq", "oH tach", "e"}, tokens) // this is wrong 🤷
}