forked from mirror/oddmu
Compare commits
4 Commits
full-text-
...
v0.5
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
da361284e8 | ||
|
|
6215d2a842 | ||
|
|
47c727c00d | ||
|
|
91381e474c |
10
README.md
10
README.md
@@ -421,15 +421,15 @@ A document with content "This is a test" when searched with the phrase
|
||||
"this test" therefore gets a score of 8: the entire phrase does not
|
||||
match but each word gets four points.
|
||||
|
||||
Trigrams are sometimes strange: In a text containing the words
|
||||
"software" and "#socialmedia", a search for "#software" returns a
|
||||
result because the trigram "#so" is part of "#socialmedia".
|
||||
Trigrams are sometimes strange: In a text containing the words "main"
|
||||
and "rail", a search for "mail" returns a match because the trigrams
|
||||
"mai" and "ail" are found. In this situation, the result has a score
|
||||
of 0.
|
||||
|
||||
## Limitations
|
||||
|
||||
Page titles are filenames with `.md` appended. If your filesystem
|
||||
cannot handle it, it can't be a page title. Specifically, *no slashes*
|
||||
in filenames.
|
||||
cannot handle it, it can't be a page name.
|
||||
|
||||
The pages are indexed as the server starts and the index is kept in
|
||||
memory. If you have a ton of pages, this surely wastes a lot of
|
||||
|
||||
8
TODO.md
8
TODO.md
@@ -9,15 +9,7 @@ Post by Delta Chat? That is, allow certain encrypted emails to post.
|
||||
|
||||
Convert the existing wiki.
|
||||
|
||||
Investigate how to run a multi-lingual wiki where an appropriate
|
||||
template is used based on the language of the page. This is important
|
||||
because the template needs to use the appropriate `lang` attribute for
|
||||
hyphenation to work.
|
||||
|
||||
Investigate how to run a multi-linugual wiki where an appropriate
|
||||
version of a page is served based on language preferences of the user.
|
||||
This is a low priority issue since it's probably only of interest for
|
||||
corporate or governmental sites.
|
||||
|
||||
Switch from trigram search to a simple full text search engine?
|
||||
https://artem.krylysov.com/blog/2020/07/28/lets-build-a-full-text-search-engine/
|
||||
|
||||
13
page.go
13
page.go
@@ -105,7 +105,7 @@ func (p *Page) handleTitle(replace bool) {
|
||||
func (p *Page) renderHtml() {
|
||||
maybeUnsafeHTML := markdown.ToHTML(p.Body, nil, nil)
|
||||
p.Html = sanitizeBytes(maybeUnsafeHTML)
|
||||
p.Language = p.language(p.plainText())
|
||||
p.Language = language(p.plainText())
|
||||
}
|
||||
|
||||
// plainText renders the Page.Body to plain text and returns it,
|
||||
@@ -141,17 +141,12 @@ func (p *Page) summarize(q string) {
|
||||
p.Score = score(q, string(p.Body)) + score(q, p.Title)
|
||||
t := p.plainText()
|
||||
p.Html = sanitize(snippets(q, t))
|
||||
p.Language = p.language(t)
|
||||
p.Language = language(t)
|
||||
}
|
||||
|
||||
func (p *Page) language (s string) string {
|
||||
func language(s string) string {
|
||||
if language, ok := detector.DetectLanguageOf(s); ok {
|
||||
switch language {
|
||||
case lingua.English:
|
||||
return "en"
|
||||
case lingua.German:
|
||||
return "de"
|
||||
}
|
||||
return strings.ToLower(language.IsoCode639_1().String())
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
11
page_test.go
11
page_test.go
@@ -76,3 +76,14 @@ Moonlight floods the aisle`)}
|
||||
_ = os.RemoveAll("testdata")
|
||||
})
|
||||
}
|
||||
|
||||
func TestLanguage(t *testing.T) {
|
||||
l := language(`
|
||||
My back hurts at night
|
||||
My shoulders won't budge today
|
||||
Winter bones I say`)
|
||||
if l != "en" {
|
||||
t.Logf("Language detected: %s", l)
|
||||
t.Fail()
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user