forked from mirror/oddmu
Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9ee2af6093 | ||
|
|
153a179d92 | ||
|
|
d9797aac75 |
24
README.md
24
README.md
@@ -119,7 +119,13 @@ is a byte array and that's why we need to call `printf`).
|
||||
|
||||
For the `search.html` template only:
|
||||
|
||||
`{{.Results}}` indicates if there were any search results.
|
||||
`{{.Previous}}`, `{{.Page}}`, `{{.Next}}` and `{{.Last}}` are the
|
||||
previous, current, next and last page number in the results since
|
||||
doing arithmetics in templates is hard. The first page number is 1.
|
||||
|
||||
`{{.More}}` indicates if there are any more search results.
|
||||
|
||||
`{{.Results}}` indicates if there were any search results at all.
|
||||
|
||||
`{{.Items}}` is an array of pages, each containing a search result. A
|
||||
search result is a page (with the properties seen above). Thus, to
|
||||
@@ -433,6 +439,22 @@ and "rail", a search for "mail" returns a match because the trigrams
|
||||
"mai" and "ail" are found. In this situation, the result has a score
|
||||
of 0.
|
||||
|
||||
The sorting of all the pages, however, does not depend on scoring!
|
||||
Computing the score is expensive because the page must be loaded from
|
||||
disk. Therefore, results are sorted by title:
|
||||
|
||||
- If the page title contains the query string, it gets sorted first.
|
||||
- If the page title begins with a number, it is sorted descending.
|
||||
- All other pages follow, sorted ascending.
|
||||
|
||||
The effect is that first, the pages with matches in the page title are
|
||||
shown, and then all the others. Within these two groups, the most
|
||||
recent blog posts are shown first, if and only if the page title
|
||||
begins with an ISO date like 2023-09-16.
|
||||
|
||||
The score and highlighting of snippets is used to help visitors decide
|
||||
which links to click.
|
||||
|
||||
## Limitations
|
||||
|
||||
Page titles are filenames with `.md` appended. If your filesystem
|
||||
|
||||
@@ -17,11 +17,14 @@ func commands() {
|
||||
} else if len(os.Args) > 2 && os.Args[1] == "search" {
|
||||
index.load()
|
||||
for _, q := range os.Args[2:] {
|
||||
items := search(q)
|
||||
items, more, _ := search(q, 1)
|
||||
fmt.Printf("Search %s: %d results\n", q, len(items))
|
||||
for _, p := range items {
|
||||
fmt.Printf("* %s (%d)\n", p.Title, p.Score)
|
||||
}
|
||||
if more {
|
||||
fmt.Printf("There are more results\n")
|
||||
}
|
||||
}
|
||||
} else {
|
||||
fmt.Printf("Unknown command: %v\n", os.Args[1:])
|
||||
|
||||
@@ -10,6 +10,6 @@ func TestLoadAndSearch(t *testing.T) {
|
||||
index.reset()
|
||||
go index.load()
|
||||
q := "Oddµ"
|
||||
pages := search(q)
|
||||
pages, _, _ := search(q, 1)
|
||||
assert.Zero(t, len(pages))
|
||||
}
|
||||
|
||||
15
index.go
15
index.go
@@ -21,6 +21,9 @@ type Index struct {
|
||||
// documents is a map, mapping document ids of the index to
|
||||
// page names.
|
||||
documents map[trigram.DocID]string
|
||||
|
||||
// names is a map, mapping page names to titles.
|
||||
titles map[string]string
|
||||
}
|
||||
|
||||
// idx is the global Index per wiki.
|
||||
@@ -30,6 +33,7 @@ var index Index
|
||||
func (idx *Index) reset() {
|
||||
idx.index = nil
|
||||
idx.documents = nil
|
||||
idx.titles = nil
|
||||
}
|
||||
|
||||
// add reads a file and adds it to the index. This must happen while
|
||||
@@ -47,8 +51,10 @@ func (idx *Index) add(path string, info fs.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
p.handleTitle(false)
|
||||
id := idx.index.Add(strings.ToLower(string(p.Body)))
|
||||
idx.documents[id] = p.Name
|
||||
idx.titles[p.Name] = p.Title
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -59,6 +65,7 @@ func (idx *Index) load() (int, error) {
|
||||
defer idx.Unlock()
|
||||
idx.index = make(trigram.Index)
|
||||
idx.documents = make(map[trigram.DocID]string)
|
||||
idx.titles = make(map[string]string)
|
||||
err := filepath.Walk(".", idx.add)
|
||||
if err != nil {
|
||||
idx.reset()
|
||||
@@ -90,23 +97,27 @@ func (p *Page) updateIndex() {
|
||||
o, err := loadPage(p.Name)
|
||||
if err == nil {
|
||||
index.index.Delete(strings.ToLower(string(o.Body)), id)
|
||||
o.handleTitle(false)
|
||||
delete(index.titles, o.Title)
|
||||
}
|
||||
index.index.Insert(strings.ToLower(string(p.Body)), id)
|
||||
p.handleTitle(false)
|
||||
index.titles[p.Name] = p.Title
|
||||
}
|
||||
}
|
||||
|
||||
// searchDocuments searches the index for a string. This requires the
|
||||
// index to be locked.
|
||||
func searchDocuments(q string) []string {
|
||||
words := strings.Fields(strings.ToLower(q))
|
||||
var trigrams []trigram.T
|
||||
for _, word := range words {
|
||||
trigrams = trigram.Extract(word, trigrams)
|
||||
}
|
||||
index.RLock()
|
||||
ids := index.index.QueryTrigrams(trigrams)
|
||||
names := make([]string, len(ids))
|
||||
for i, id := range ids {
|
||||
names[i] = index.documents[id]
|
||||
}
|
||||
index.RUnlock()
|
||||
return names
|
||||
}
|
||||
|
||||
@@ -11,19 +11,19 @@ import (
|
||||
func TestIndex(t *testing.T) {
|
||||
index.load()
|
||||
q := "Oddµ"
|
||||
pages := search(q)
|
||||
pages, _, _ := search(q, 1)
|
||||
assert.NotZero(t, len(pages))
|
||||
for _, p := range pages {
|
||||
assert.NotContains(t, p.Title, "<b>")
|
||||
assert.True(t, strings.Contains(string(p.Body), q) || strings.Contains(string(p.Title), q))
|
||||
assert.NotZero(t, p.Score)
|
||||
assert.NotZero(t, p.Score, "Score %d for %s", p.Score, p.Name)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSearchHashtag(t *testing.T) {
|
||||
index.load()
|
||||
q := "#Another_Tag"
|
||||
pages := search(q)
|
||||
pages, _, _ := search(q, 1)
|
||||
assert.NotZero(t, len(pages))
|
||||
}
|
||||
|
||||
@@ -35,7 +35,7 @@ func TestIndexUpdates(t *testing.T) {
|
||||
p.save()
|
||||
|
||||
// Find the phrase
|
||||
pages := search("This is a test")
|
||||
pages, _, _ := search("This is a test", 1)
|
||||
found := false
|
||||
for _, p := range pages {
|
||||
if p.Name == name {
|
||||
@@ -46,7 +46,7 @@ func TestIndexUpdates(t *testing.T) {
|
||||
assert.True(t, found)
|
||||
|
||||
// Find the phrase, case insensitive
|
||||
pages = search("this is a test")
|
||||
pages, _, _ = search("this is a test", 1)
|
||||
found = false
|
||||
for _, p := range pages {
|
||||
if p.Name == name {
|
||||
@@ -57,7 +57,7 @@ func TestIndexUpdates(t *testing.T) {
|
||||
assert.True(t, found)
|
||||
|
||||
// Find some words
|
||||
pages = search("this test")
|
||||
pages, _, _ = search("this test", 1)
|
||||
found = false
|
||||
for _, p := range pages {
|
||||
if p.Name == name {
|
||||
@@ -70,7 +70,7 @@ func TestIndexUpdates(t *testing.T) {
|
||||
// Update the page and no longer find it with the old phrase
|
||||
p = &Page{Name: name, Body: []byte("Guvf vf n grfg.")}
|
||||
p.save()
|
||||
pages = search("This is a test")
|
||||
pages, _, _ = search("This is a test", 1)
|
||||
found = false
|
||||
for _, p := range pages {
|
||||
if p.Name == name {
|
||||
@@ -81,7 +81,7 @@ func TestIndexUpdates(t *testing.T) {
|
||||
assert.False(t, found)
|
||||
|
||||
// Find page using a new word
|
||||
pages = search("Guvf")
|
||||
pages, _, _ = search("Guvf", 1)
|
||||
found = false
|
||||
for _, p := range pages {
|
||||
if p.Name == name {
|
||||
|
||||
8
page.go
8
page.go
@@ -186,10 +186,14 @@ func (p *Page) plainText() string {
|
||||
return string(text)
|
||||
}
|
||||
|
||||
// summarize for query string q sets Page.Html to an extract.
|
||||
func (p *Page) summarize(q string) {
|
||||
// score sets Page.Title and computes Page.Score.
|
||||
func (p *Page) score(q string) {
|
||||
p.handleTitle(true)
|
||||
p.Score = score(q, string(p.Body)) + score(q, p.Title)
|
||||
}
|
||||
|
||||
// summarize sets Page.Html to an extract and sets Page.Language.
|
||||
func (p *Page) summarize(q string) {
|
||||
t := p.plainText()
|
||||
p.Html = sanitize(snippets(q, t))
|
||||
p.Language = language(t)
|
||||
|
||||
@@ -94,7 +94,7 @@ func TestScorePageAndMarkup(t *testing.T) {
|
||||
s := `The Transjovian Council accepts new members. If you think we'd be a good fit, apply for an account. Contact [Alex Schroeder](https://alexschroeder.ch/wiki/Contact). Mail is best. Encrypted mail is best. [Delta Chat](https://delta.chat/de/) is a messenger app that uses encrypted mail. It's the bestest best.`
|
||||
p := &Page{Title: "Test", Name: "Test", Body: []byte(s)}
|
||||
q := "wiki"
|
||||
p.summarize(q)
|
||||
p.score(q)
|
||||
// "wiki" is not visible in the plain text but the score is no affected:
|
||||
// - wiki, all, whole, beginning, end (5)
|
||||
if p.Score != 5 {
|
||||
|
||||
115
search.go
115
search.go
@@ -4,6 +4,8 @@ import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
)
|
||||
@@ -14,67 +16,97 @@ import (
|
||||
// a search result, Body and Html are simple extracts.
|
||||
type Search struct {
|
||||
Query string
|
||||
Items []Page
|
||||
Items []*Page
|
||||
Previous int
|
||||
Page int
|
||||
Next int
|
||||
Last int
|
||||
More bool
|
||||
Results bool
|
||||
}
|
||||
|
||||
func sortItems(a, b Page) int {
|
||||
// Sort by score
|
||||
if a.Score < b.Score {
|
||||
return 1
|
||||
} else if a.Score > b.Score {
|
||||
return -1
|
||||
}
|
||||
// If the score is the same and both page names start
|
||||
// with a number (like an ISO date), sort descending.
|
||||
ra, _ := utf8.DecodeRuneInString(a.Title)
|
||||
rb, _ := utf8.DecodeRuneInString(b.Title)
|
||||
if unicode.IsNumber(ra) && unicode.IsNumber(rb) {
|
||||
if a.Title < b.Title {
|
||||
return 1
|
||||
} else if a.Title > b.Title {
|
||||
// sortNames returns a sort function that sorts in three stages: 1.
|
||||
// whether the query string matches the page title; 2. descending if
|
||||
// the page titles start with a digit; 3. otherwise ascending.
|
||||
// Access to the index requires a read lock!
|
||||
func sortNames(q string) func (a, b string) int {
|
||||
return func (a, b string) int {
|
||||
// If only one page contains the query string, it
|
||||
// takes precedence.
|
||||
ia := strings.Contains(index.titles[a], q)
|
||||
ib := strings.Contains(index.titles[b], q)
|
||||
if (ia && !ib) {
|
||||
return -1
|
||||
} else if (!ia && ib) {
|
||||
return 1
|
||||
}
|
||||
// If both page names start with a number (like an ISO date),
|
||||
// sort descending.
|
||||
ra, _ := utf8.DecodeRuneInString(a)
|
||||
rb, _ := utf8.DecodeRuneInString(b)
|
||||
if unicode.IsNumber(ra) && unicode.IsNumber(rb) {
|
||||
if a < b {
|
||||
return 1
|
||||
} else if a > b {
|
||||
return -1
|
||||
} else {
|
||||
return 0
|
||||
}
|
||||
}
|
||||
// Otherwise sort ascending.
|
||||
if a < b {
|
||||
return -1
|
||||
} else if a > b {
|
||||
return 1
|
||||
} else {
|
||||
return 0
|
||||
}
|
||||
}
|
||||
// Otherwise sort ascending.
|
||||
if a.Title < b.Title {
|
||||
return -1
|
||||
} else if a.Title > b.Title {
|
||||
return 1
|
||||
} else {
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
// loadAndSummarize loads the pages named and summarizes them for the
|
||||
// query give.
|
||||
func loadAndSummarize(names []string, q string) []Page {
|
||||
// Load and summarize the items.
|
||||
items := make([]Page, len(names))
|
||||
// load the pages named.
|
||||
func load(names []string) []*Page {
|
||||
items := make([]*Page, len(names))
|
||||
for i, name := range names {
|
||||
p, err := loadPage(name)
|
||||
if err != nil {
|
||||
fmt.Printf("Error loading %s\n", name)
|
||||
} else {
|
||||
p.summarize(q)
|
||||
items[i] = *p
|
||||
items[i] = p
|
||||
}
|
||||
}
|
||||
return items
|
||||
}
|
||||
|
||||
// itemsPerPage says how many items to print on a page of search
|
||||
// results.
|
||||
const itemsPerPage = 20
|
||||
|
||||
// search returns a sorted []Page where each page contains an extract
|
||||
// of the actual Page.Body in its Page.Html.
|
||||
func search(q string) []Page {
|
||||
// of the actual Page.Body in its Page.Html. Page size is 20. The
|
||||
// boolean return value indicates whether there are more results.
|
||||
func search(q string, page int) ([]*Page, bool, int) {
|
||||
if len(q) == 0 {
|
||||
return make([]Page, 0)
|
||||
return make([]*Page, 0), false, 0
|
||||
}
|
||||
index.RLock()
|
||||
names := searchDocuments(q)
|
||||
items := loadAndSummarize(names, q)
|
||||
slices.SortFunc(items, sortItems)
|
||||
return items
|
||||
slices.SortFunc(names, sortNames(q))
|
||||
index.RUnlock()
|
||||
from := itemsPerPage*(page-1)
|
||||
if from > len(names) {
|
||||
return make([]*Page, 0), false, 0
|
||||
}
|
||||
to := from + itemsPerPage
|
||||
if to > len(names) {
|
||||
to = len(names)
|
||||
}
|
||||
items := load(names[from:to])
|
||||
for _, p := range items {
|
||||
p.score(q)
|
||||
p.summarize(q)
|
||||
}
|
||||
return items, to < len(names), len(names)/itemsPerPage+1
|
||||
}
|
||||
|
||||
// searchHandler presents a search result. It uses the query string in
|
||||
@@ -82,7 +114,12 @@ func search(q string) []Page {
|
||||
// page found, the HTML is just an extract of the actual body.
|
||||
func searchHandler(w http.ResponseWriter, r *http.Request) {
|
||||
q := r.FormValue("q")
|
||||
items := search(q)
|
||||
s := &Search{Query: q, Items: items, Results: len(items) > 0}
|
||||
page, err := strconv.Atoi(r.FormValue("page"))
|
||||
if err != nil {
|
||||
page = 1
|
||||
}
|
||||
items, more, last := search(q, page)
|
||||
s := &Search{Query: q, Items: items, Previous: page-1, Page: page, Next: page+1, Last: last,
|
||||
Results: len(items) > 0, More: more}
|
||||
renderTemplate(w, "search", s)
|
||||
}
|
||||
|
||||
@@ -23,13 +23,19 @@ img { max-width: 20%; }
|
||||
<a href="/view/index">Home</a>
|
||||
<form role="search" action="/search" method="GET">
|
||||
<label for="search">Search:</label>
|
||||
<input id="search" type="text" value="{{.Query}}" spellcheck="false" name="q" required>
|
||||
<input id="search" type="text" value="{{.Query}}" spellcheck="false" name="q" accesskey="f" required>
|
||||
<button>Go</button>
|
||||
</form>
|
||||
</header>
|
||||
<main id="main">
|
||||
<h1>Search for {{.Query}}</h1>
|
||||
{{if .Results}}
|
||||
<p>
|
||||
{{if gt .Page 2}}<a href="/search?q={{.Query}}&page=1">First</a>{{end}}
|
||||
{{if gt .Page 1}}<a href="/search?q={{.Query}}&page={{.Previous}}">Previous</a>{{end}}
|
||||
Page {{.Page}}
|
||||
{{if .More}}<a href="/search?q={{.Query}}&page={{.Next}}">Next</a>{{end}}
|
||||
{{if lt .Next .Last}}<a href="/search?q={{.Query}}&page={{.Last}}">Last</a>{{end}}
|
||||
{{range .Items}}
|
||||
<article lang="{{.Language}}">
|
||||
<p><a class="result" href="/view/{{.Name}}">{{.Title}}</a>
|
||||
|
||||
@@ -20,12 +20,12 @@ img { max-width: 100%; }
|
||||
<header>
|
||||
<a href="#main">Skip navigation</a>
|
||||
<a href="/view/index">Home</a>
|
||||
<a href="/edit/{{.Name}}">Edit</a>
|
||||
<a href="/add/{{.Name}}">Add</a>
|
||||
<a href="/upload/{{.Dir}}">Upload</a>
|
||||
<a href="/edit/{{.Name}}" accesskey="e">Edit</a>
|
||||
<a href="/add/{{.Name}}" accesskey="a">Add</a>
|
||||
<a href="/upload/{{.Dir}}" accesskey="u">Upload</a>
|
||||
<form role="search" action="/search" method="GET">
|
||||
<label for="search">Search:</label>
|
||||
<input id="search" type="text" spellcheck="false" name="q" required>
|
||||
<input id="search" type="text" spellcheck="false" name="q" accesskey="f" required>
|
||||
<button>Go</button>
|
||||
</form>
|
||||
</header>
|
||||
|
||||
Reference in New Issue
Block a user