3 Commits
timing ... v1.0

Author SHA1 Message Date
Alex Schroeder
9ee2af6093 Add access keys 2023-09-16 23:40:27 +02:00
Alex Schroeder
153a179d92 Search results are based on page titles only 2023-09-16 23:36:17 +02:00
Alex Schroeder
d9797aac75 Add pagination of the results
Sorting and scoring still requires the loading of all the found pages,
but the summary is only computed for the pages on the page.
2023-09-16 16:14:59 +02:00
10 changed files with 143 additions and 60 deletions

View File

@@ -119,7 +119,13 @@ is a byte array and that's why we need to call `printf`).
For the `search.html` template only:
`{{.Results}}` indicates if there were any search results.
`{{.Previous}}`, `{{.Page}}`, `{{.Next}}` and `{{.Last}}` are the
previous, current, next and last page number in the results since
doing arithmetics in templates is hard. The first page number is 1.
`{{.More}}` indicates if there are any more search results.
`{{.Results}}` indicates if there were any search results at all.
`{{.Items}}` is an array of pages, each containing a search result. A
search result is a page (with the properties seen above). Thus, to
@@ -433,6 +439,22 @@ and "rail", a search for "mail" returns a match because the trigrams
"mai" and "ail" are found. In this situation, the result has a score
of 0.
The sorting of all the pages, however, does not depend on scoring!
Computing the score is expensive because the page must be loaded from
disk. Therefore, results are sorted by title:
- If the page title contains the query string, it gets sorted first.
- If the page title begins with a number, it is sorted descending.
- All other pages follow, sorted ascending.
The effect is that first, the pages with matches in the page title are
shown, and then all the others. Within these two groups, the most
recent blog posts are shown first, if and only if the page title
begins with an ISO date like 2023-09-16.
The score and highlighting of snippets is used to help visitors decide
which links to click.
## Limitations
Page titles are filenames with `.md` appended. If your filesystem

View File

@@ -17,11 +17,14 @@ func commands() {
} else if len(os.Args) > 2 && os.Args[1] == "search" {
index.load()
for _, q := range os.Args[2:] {
items := search(q)
items, more, _ := search(q, 1)
fmt.Printf("Search %s: %d results\n", q, len(items))
for _, p := range items {
fmt.Printf("* %s (%d)\n", p.Title, p.Score)
}
if more {
fmt.Printf("There are more results\n")
}
}
} else {
fmt.Printf("Unknown command: %v\n", os.Args[1:])

View File

@@ -10,6 +10,6 @@ func TestLoadAndSearch(t *testing.T) {
index.reset()
go index.load()
q := "Oddµ"
pages := search(q)
pages, _, _ := search(q, 1)
assert.Zero(t, len(pages))
}

View File

@@ -21,6 +21,9 @@ type Index struct {
// documents is a map, mapping document ids of the index to
// page names.
documents map[trigram.DocID]string
// names is a map, mapping page names to titles.
titles map[string]string
}
// idx is the global Index per wiki.
@@ -30,6 +33,7 @@ var index Index
func (idx *Index) reset() {
idx.index = nil
idx.documents = nil
idx.titles = nil
}
// add reads a file and adds it to the index. This must happen while
@@ -47,8 +51,10 @@ func (idx *Index) add(path string, info fs.FileInfo, err error) error {
if err != nil {
return err
}
p.handleTitle(false)
id := idx.index.Add(strings.ToLower(string(p.Body)))
idx.documents[id] = p.Name
idx.titles[p.Name] = p.Title
return nil
}
@@ -59,6 +65,7 @@ func (idx *Index) load() (int, error) {
defer idx.Unlock()
idx.index = make(trigram.Index)
idx.documents = make(map[trigram.DocID]string)
idx.titles = make(map[string]string)
err := filepath.Walk(".", idx.add)
if err != nil {
idx.reset()
@@ -90,23 +97,27 @@ func (p *Page) updateIndex() {
o, err := loadPage(p.Name)
if err == nil {
index.index.Delete(strings.ToLower(string(o.Body)), id)
o.handleTitle(false)
delete(index.titles, o.Title)
}
index.index.Insert(strings.ToLower(string(p.Body)), id)
p.handleTitle(false)
index.titles[p.Name] = p.Title
}
}
// searchDocuments searches the index for a string. This requires the
// index to be locked.
func searchDocuments(q string) []string {
words := strings.Fields(strings.ToLower(q))
var trigrams []trigram.T
for _, word := range words {
trigrams = trigram.Extract(word, trigrams)
}
index.RLock()
ids := index.index.QueryTrigrams(trigrams)
names := make([]string, len(ids))
for i, id := range ids {
names[i] = index.documents[id]
}
index.RUnlock()
return names
}

View File

@@ -11,19 +11,19 @@ import (
func TestIndex(t *testing.T) {
index.load()
q := "Oddµ"
pages := search(q)
pages, _, _ := search(q, 1)
assert.NotZero(t, len(pages))
for _, p := range pages {
assert.NotContains(t, p.Title, "<b>")
assert.True(t, strings.Contains(string(p.Body), q) || strings.Contains(string(p.Title), q))
assert.NotZero(t, p.Score)
assert.NotZero(t, p.Score, "Score %d for %s", p.Score, p.Name)
}
}
func TestSearchHashtag(t *testing.T) {
index.load()
q := "#Another_Tag"
pages := search(q)
pages, _, _ := search(q, 1)
assert.NotZero(t, len(pages))
}
@@ -35,7 +35,7 @@ func TestIndexUpdates(t *testing.T) {
p.save()
// Find the phrase
pages := search("This is a test")
pages, _, _ := search("This is a test", 1)
found := false
for _, p := range pages {
if p.Name == name {
@@ -46,7 +46,7 @@ func TestIndexUpdates(t *testing.T) {
assert.True(t, found)
// Find the phrase, case insensitive
pages = search("this is a test")
pages, _, _ = search("this is a test", 1)
found = false
for _, p := range pages {
if p.Name == name {
@@ -57,7 +57,7 @@ func TestIndexUpdates(t *testing.T) {
assert.True(t, found)
// Find some words
pages = search("this test")
pages, _, _ = search("this test", 1)
found = false
for _, p := range pages {
if p.Name == name {
@@ -70,7 +70,7 @@ func TestIndexUpdates(t *testing.T) {
// Update the page and no longer find it with the old phrase
p = &Page{Name: name, Body: []byte("Guvf vf n grfg.")}
p.save()
pages = search("This is a test")
pages, _, _ = search("This is a test", 1)
found = false
for _, p := range pages {
if p.Name == name {
@@ -81,7 +81,7 @@ func TestIndexUpdates(t *testing.T) {
assert.False(t, found)
// Find page using a new word
pages = search("Guvf")
pages, _, _ = search("Guvf", 1)
found = false
for _, p := range pages {
if p.Name == name {

View File

@@ -186,10 +186,14 @@ func (p *Page) plainText() string {
return string(text)
}
// summarize for query string q sets Page.Html to an extract.
func (p *Page) summarize(q string) {
// score sets Page.Title and computes Page.Score.
func (p *Page) score(q string) {
p.handleTitle(true)
p.Score = score(q, string(p.Body)) + score(q, p.Title)
}
// summarize sets Page.Html to an extract and sets Page.Language.
func (p *Page) summarize(q string) {
t := p.plainText()
p.Html = sanitize(snippets(q, t))
p.Language = language(t)

View File

@@ -94,7 +94,7 @@ func TestScorePageAndMarkup(t *testing.T) {
s := `The Transjovian Council accepts new members. If you think we'd be a good fit, apply for an account. Contact [Alex Schroeder](https://alexschroeder.ch/wiki/Contact). Mail is best. Encrypted mail is best. [Delta Chat](https://delta.chat/de/) is a messenger app that uses encrypted mail. It's the bestest best.`
p := &Page{Title: "Test", Name: "Test", Body: []byte(s)}
q := "wiki"
p.summarize(q)
p.score(q)
// "wiki" is not visible in the plain text but the score is no affected:
// - wiki, all, whole, beginning, end (5)
if p.Score != 5 {

115
search.go
View File

@@ -4,6 +4,8 @@ import (
"fmt"
"net/http"
"slices"
"strconv"
"strings"
"unicode"
"unicode/utf8"
)
@@ -14,67 +16,97 @@ import (
// a search result, Body and Html are simple extracts.
type Search struct {
Query string
Items []Page
Items []*Page
Previous int
Page int
Next int
Last int
More bool
Results bool
}
func sortItems(a, b Page) int {
// Sort by score
if a.Score < b.Score {
return 1
} else if a.Score > b.Score {
return -1
}
// If the score is the same and both page names start
// with a number (like an ISO date), sort descending.
ra, _ := utf8.DecodeRuneInString(a.Title)
rb, _ := utf8.DecodeRuneInString(b.Title)
if unicode.IsNumber(ra) && unicode.IsNumber(rb) {
if a.Title < b.Title {
return 1
} else if a.Title > b.Title {
// sortNames returns a sort function that sorts in three stages: 1.
// whether the query string matches the page title; 2. descending if
// the page titles start with a digit; 3. otherwise ascending.
// Access to the index requires a read lock!
func sortNames(q string) func (a, b string) int {
return func (a, b string) int {
// If only one page contains the query string, it
// takes precedence.
ia := strings.Contains(index.titles[a], q)
ib := strings.Contains(index.titles[b], q)
if (ia && !ib) {
return -1
} else if (!ia && ib) {
return 1
}
// If both page names start with a number (like an ISO date),
// sort descending.
ra, _ := utf8.DecodeRuneInString(a)
rb, _ := utf8.DecodeRuneInString(b)
if unicode.IsNumber(ra) && unicode.IsNumber(rb) {
if a < b {
return 1
} else if a > b {
return -1
} else {
return 0
}
}
// Otherwise sort ascending.
if a < b {
return -1
} else if a > b {
return 1
} else {
return 0
}
}
// Otherwise sort ascending.
if a.Title < b.Title {
return -1
} else if a.Title > b.Title {
return 1
} else {
return 0
}
}
// loadAndSummarize loads the pages named and summarizes them for the
// query give.
func loadAndSummarize(names []string, q string) []Page {
// Load and summarize the items.
items := make([]Page, len(names))
// load the pages named.
func load(names []string) []*Page {
items := make([]*Page, len(names))
for i, name := range names {
p, err := loadPage(name)
if err != nil {
fmt.Printf("Error loading %s\n", name)
} else {
p.summarize(q)
items[i] = *p
items[i] = p
}
}
return items
}
// itemsPerPage says how many items to print on a page of search
// results.
const itemsPerPage = 20
// search returns a sorted []Page where each page contains an extract
// of the actual Page.Body in its Page.Html.
func search(q string) []Page {
// of the actual Page.Body in its Page.Html. Page size is 20. The
// boolean return value indicates whether there are more results.
func search(q string, page int) ([]*Page, bool, int) {
if len(q) == 0 {
return make([]Page, 0)
return make([]*Page, 0), false, 0
}
index.RLock()
names := searchDocuments(q)
items := loadAndSummarize(names, q)
slices.SortFunc(items, sortItems)
return items
slices.SortFunc(names, sortNames(q))
index.RUnlock()
from := itemsPerPage*(page-1)
if from > len(names) {
return make([]*Page, 0), false, 0
}
to := from + itemsPerPage
if to > len(names) {
to = len(names)
}
items := load(names[from:to])
for _, p := range items {
p.score(q)
p.summarize(q)
}
return items, to < len(names), len(names)/itemsPerPage+1
}
// searchHandler presents a search result. It uses the query string in
@@ -82,7 +114,12 @@ func search(q string) []Page {
// page found, the HTML is just an extract of the actual body.
func searchHandler(w http.ResponseWriter, r *http.Request) {
q := r.FormValue("q")
items := search(q)
s := &Search{Query: q, Items: items, Results: len(items) > 0}
page, err := strconv.Atoi(r.FormValue("page"))
if err != nil {
page = 1
}
items, more, last := search(q, page)
s := &Search{Query: q, Items: items, Previous: page-1, Page: page, Next: page+1, Last: last,
Results: len(items) > 0, More: more}
renderTemplate(w, "search", s)
}

View File

@@ -23,13 +23,19 @@ img { max-width: 20%; }
<a href="/view/index">Home</a>
<form role="search" action="/search" method="GET">
<label for="search">Search:</label>
<input id="search" type="text" value="{{.Query}}" spellcheck="false" name="q" required>
<input id="search" type="text" value="{{.Query}}" spellcheck="false" name="q" accesskey="f" required>
<button>Go</button>
</form>
</header>
<main id="main">
<h1>Search for {{.Query}}</h1>
{{if .Results}}
<p>
{{if gt .Page 2}}<a href="/search?q={{.Query}}&page=1">First</a>{{end}}
{{if gt .Page 1}}<a href="/search?q={{.Query}}&page={{.Previous}}">Previous</a>{{end}}
Page {{.Page}}
{{if .More}}<a href="/search?q={{.Query}}&page={{.Next}}">Next</a>{{end}}
{{if lt .Next .Last}}<a href="/search?q={{.Query}}&page={{.Last}}">Last</a>{{end}}
{{range .Items}}
<article lang="{{.Language}}">
<p><a class="result" href="/view/{{.Name}}">{{.Title}}</a>

View File

@@ -20,12 +20,12 @@ img { max-width: 100%; }
<header>
<a href="#main">Skip navigation</a>
<a href="/view/index">Home</a>
<a href="/edit/{{.Name}}">Edit</a>
<a href="/add/{{.Name}}">Add</a>
<a href="/upload/{{.Dir}}">Upload</a>
<a href="/edit/{{.Name}}" accesskey="e">Edit</a>
<a href="/add/{{.Name}}" accesskey="a">Add</a>
<a href="/upload/{{.Dir}}" accesskey="u">Upload</a>
<form role="search" action="/search" method="GET">
<label for="search">Search:</label>
<input id="search" type="text" spellcheck="false" name="q" required>
<input id="search" type="text" spellcheck="false" name="q" accesskey="f" required>
<button>Go</button>
</form>
</header>