forked from mirror/oddmu
Compare commits
5 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2188f99dea | ||
|
|
c063174063 | ||
|
|
e1258da63b | ||
|
|
8eb700fb0b | ||
|
|
7514c2173b |
14
accounts.go
14
accounts.go
@@ -2,10 +2,10 @@ package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"github.com/gomarkdown/markdown/ast"
|
||||
"github.com/gomarkdown/markdown/parser"
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"sync"
|
||||
@@ -82,7 +82,7 @@ func account(p *parser.Parser, data []byte, offset int) (int, ast.Node) {
|
||||
uri, ok := accounts.uris[string(account)]
|
||||
defer accounts.RUnlock()
|
||||
if !ok {
|
||||
fmt.Printf("Looking up %s\n", account)
|
||||
log.Printf("Looking up %s\n", account)
|
||||
uri = "https://" + string(domain) + "/users/" + string(user[1:])
|
||||
accounts.uris[string(account)] = uri // prevent more lookings
|
||||
go lookUpAccountUri(string(account), string(domain))
|
||||
@@ -103,26 +103,26 @@ func lookUpAccountUri(account, domain string) {
|
||||
uri := "https://" + domain + "/.well-known/webfinger"
|
||||
resp, err := http.Get(uri + "?resource=acct:" + account)
|
||||
if err != nil {
|
||||
fmt.Printf("Failed to look up %s: %s\n", account, err)
|
||||
log.Printf("Failed to look up %s: %s", account, err)
|
||||
return
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
fmt.Printf("Failed to read from %s: %s\n", account, err)
|
||||
log.Printf("Failed to read from %s: %s", account, err)
|
||||
return
|
||||
}
|
||||
var wf WebFinger
|
||||
err = json.Unmarshal([]byte(body), &wf)
|
||||
if err != nil {
|
||||
fmt.Printf("Failed to parse the JSON from %s: %s\n", account, err)
|
||||
log.Printf("Failed to parse the JSON from %s: %s", account, err)
|
||||
return
|
||||
}
|
||||
uri, err = parseWebFinger(body)
|
||||
if err != nil {
|
||||
fmt.Printf("Could not find profile URI for %s: %s\n", account, err)
|
||||
log.Printf("Could not find profile URI for %s: %s", account, err)
|
||||
}
|
||||
fmt.Printf("Found profile for %s: %s\n", account, uri)
|
||||
log.Printf("Found profile for %s: %s", account, uri)
|
||||
accounts.Lock()
|
||||
defer accounts.Unlock()
|
||||
accounts.uris[account] = uri
|
||||
|
||||
@@ -29,7 +29,6 @@ It's not `)}
|
||||
HTTPRedirectTo(t, makeHandler(appendHandler, true), "POST", "/append/testdata/fire", data, "/view/testdata/fire")
|
||||
assert.Regexp(t, regexp.MustCompile("It’s not barbecue"),
|
||||
assert.HTTPBody(makeHandler(viewHandler, true), "GET", "/view/testdata/fire", nil))
|
||||
|
||||
t.Cleanup(func() {
|
||||
_ = os.RemoveAll("testdata")
|
||||
})
|
||||
|
||||
2
feed.go
2
feed.go
@@ -1,11 +1,11 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"github.com/gomarkdown/markdown"
|
||||
"github.com/gomarkdown/markdown/ast"
|
||||
"github.com/gomarkdown/markdown/parser"
|
||||
"html/template"
|
||||
"bytes"
|
||||
"os"
|
||||
"path"
|
||||
"time"
|
||||
|
||||
@@ -7,7 +7,7 @@ import (
|
||||
)
|
||||
|
||||
func TestFeed(t *testing.T) {
|
||||
assert.Contains(t,
|
||||
assert.Contains(t,
|
||||
assert.HTTPBody(makeHandler(viewHandler, true), "GET", "/view/index.rss", nil),
|
||||
"Welcome to Oddµ")
|
||||
}
|
||||
@@ -16,7 +16,7 @@ func TestFeed(t *testing.T) {
|
||||
func TestFeedItems(t *testing.T) {
|
||||
_ = os.RemoveAll("testdata")
|
||||
index.load()
|
||||
|
||||
|
||||
p1 := &Page{Name: "testdata/cactus", Body: []byte(`# Cactus
|
||||
Green head and white hair
|
||||
A bench in the evening sun
|
||||
|
||||
1
go.mod
1
go.mod
@@ -4,7 +4,6 @@ go 1.21.0
|
||||
|
||||
require (
|
||||
github.com/anthonynsimon/bild v0.13.0
|
||||
github.com/dgryski/go-trigram v0.0.0-20160407183937-79ec494e1ad0
|
||||
github.com/gomarkdown/markdown v0.0.0-20230912175223-14b07df9d538
|
||||
github.com/google/subcommands v1.2.0
|
||||
github.com/hexops/gotextdiff v1.0.3
|
||||
|
||||
2
go.sum
2
go.sum
@@ -10,8 +10,6 @@ github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3Ee
|
||||
github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/dgryski/go-trigram v0.0.0-20160407183937-79ec494e1ad0 h1:b+7JSiBM+hnLQjP/lXztks5hnLt1PS46hktG9VOJgzo=
|
||||
github.com/dgryski/go-trigram v0.0.0-20160407183937-79ec494e1ad0/go.mod h1:qzKC/DpcxK67zaSHdCmIv3L9WJViHVinYXN2S7l3RM8=
|
||||
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
|
||||
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
|
||||
github.com/gomarkdown/markdown v0.0.0-20230912175223-14b07df9d538 h1:ePDpFu7l0QUV46/9A7icfL2wvIOzTJLCWh4RO2NECzE=
|
||||
|
||||
@@ -30,7 +30,7 @@ func (cmd *htmlCmd) Execute(_ context.Context, f *flag.FlagSet, _ ...interface{}
|
||||
}
|
||||
|
||||
func htmlCli(w io.Writer, useTemplate bool, args []string) subcommands.ExitStatus {
|
||||
for _, arg := range args {
|
||||
for _, arg := range args {
|
||||
p, err := loadPage(arg)
|
||||
if err != nil {
|
||||
fmt.Fprintf(w, "Cannot load %s: %s\n", arg, err)
|
||||
|
||||
@@ -2,9 +2,9 @@ package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"github.com/google/subcommands"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"testing"
|
||||
"github.com/google/subcommands"
|
||||
)
|
||||
|
||||
func TestHtmlCmd(t *testing.T) {
|
||||
|
||||
224
index.go
224
index.go
@@ -1,43 +1,97 @@
|
||||
// Read Artem Krylysov's blog post on full text search as an
|
||||
// introduction.
|
||||
// https://artem.krylysov.com/blog/2020/07/28/lets-build-a-full-text-search-engine/
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
trigram "github.com/dgryski/go-trigram"
|
||||
"io/fs"
|
||||
"log"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
)
|
||||
|
||||
type docid uint
|
||||
|
||||
// Index contains the two maps used for search. Make sure to lock and
|
||||
// unlock as appropriate.
|
||||
type Index struct {
|
||||
sync.RWMutex
|
||||
|
||||
// index is a struct containing the trigram index for search.
|
||||
// It is generated at startup and updated after every page
|
||||
// edit. The index is case-insensitive.
|
||||
index trigram.Index
|
||||
// next_id is the number of the next document added to the index
|
||||
next_id docid
|
||||
|
||||
// documents is a map, mapping document ids of the index to
|
||||
// page names.
|
||||
documents map[trigram.DocID]string
|
||||
// index is an inverted index mapping tokens to document ids.
|
||||
token map[string][]docid
|
||||
|
||||
// names is a map, mapping page names to titles.
|
||||
// documents is a map, mapping document ids to page names.
|
||||
documents map[docid]string
|
||||
|
||||
// titles is a map, mapping page names to titles.
|
||||
titles map[string]string
|
||||
}
|
||||
|
||||
// idx is the global Index per wiki.
|
||||
var index Index
|
||||
|
||||
// reset resets the Index. This assumes that the index is locked!
|
||||
func (idx *Index) reset() {
|
||||
idx.index = nil
|
||||
idx.token = nil
|
||||
idx.documents = nil
|
||||
idx.titles = nil
|
||||
}
|
||||
|
||||
// addDocument adds the text as a new document. This assumes that the
|
||||
// index is locked!
|
||||
func (idx *Index) addDocument(text []byte) docid {
|
||||
id := idx.next_id
|
||||
idx.next_id++
|
||||
for _, token := range hashtags(text) {
|
||||
ids := idx.token[token]
|
||||
// Don't add same ID more than once. Checking the last
|
||||
// position of the []docid works because the id is
|
||||
// always a new one, i.e. the last one, if at all.
|
||||
if ids != nil && ids[len(ids)-1] == id {
|
||||
continue
|
||||
}
|
||||
idx.token[token] = append(ids, id)
|
||||
}
|
||||
return id
|
||||
}
|
||||
|
||||
// deleteDocument deletes the text as a new document. The id can no
|
||||
// longer be used. This assumes that the index is locked!
|
||||
func (idx *Index) deleteDocument(text []byte, id docid) {
|
||||
for _, token := range hashtags(text) {
|
||||
ids := index.token[token]
|
||||
// Tokens can appear multiple times in a text but they
|
||||
// can only be deleted once. deleted.
|
||||
if ids == nil {
|
||||
continue
|
||||
}
|
||||
// If the token appears only in this document, remove
|
||||
// the whole entry.
|
||||
if len(ids) == 1 && ids[0] == id {
|
||||
delete(index.token, token)
|
||||
continue
|
||||
}
|
||||
// Otherwise, remove the token from the index.
|
||||
i := sort.Search(len(ids), func(i int) bool { return ids[i] >= id })
|
||||
if i != -1 && i < len(ids) && ids[i] == id {
|
||||
copy(ids[i:], ids[i+1:])
|
||||
index.token[token] = ids[:len(ids)-1]
|
||||
continue
|
||||
}
|
||||
// If none of the above, then our docid wasn't
|
||||
// indexed. This shouldn't happen, either.
|
||||
log.Printf("The index for token %s does not contain doc id %d", token, id)
|
||||
}
|
||||
delete(index.documents, id)
|
||||
}
|
||||
|
||||
// add reads a file and adds it to the index. This must happen while
|
||||
// the idx is locked, which is true when called from loadIndex.
|
||||
// the idx is locked.
|
||||
func (idx *Index) add(path string, info fs.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -52,7 +106,8 @@ func (idx *Index) add(path string, info fs.FileInfo, err error) error {
|
||||
return err
|
||||
}
|
||||
p.handleTitle(false)
|
||||
id := idx.index.Add(strings.ToLower(string(p.Body)))
|
||||
|
||||
id := idx.addDocument(p.Body)
|
||||
idx.documents[id] = p.Name
|
||||
idx.titles[p.Name] = p.Title
|
||||
return nil
|
||||
@@ -63,8 +118,8 @@ func (idx *Index) add(path string, info fs.FileInfo, err error) error {
|
||||
func (idx *Index) load() (int, error) {
|
||||
idx.Lock()
|
||||
defer idx.Unlock()
|
||||
idx.index = make(trigram.Index)
|
||||
idx.documents = make(map[trigram.DocID]string)
|
||||
idx.token = make(map[string][]docid)
|
||||
idx.documents = make(map[docid]string)
|
||||
idx.titles = make(map[string]string)
|
||||
err := filepath.Walk(".", idx.add)
|
||||
if err != nil {
|
||||
@@ -75,15 +130,23 @@ func (idx *Index) load() (int, error) {
|
||||
return n, nil
|
||||
}
|
||||
|
||||
// dump prints the index to the log for debugging. Must already be readlocked.
|
||||
func (idx *Index) dump() {
|
||||
index.RLock()
|
||||
defer index.RUnlock()
|
||||
for token, ids := range idx.token {
|
||||
log.Printf("%s: %v", token, ids)
|
||||
}
|
||||
}
|
||||
|
||||
// updateIndex updates the index for a single page. The old text is
|
||||
// loaded from the disk and removed from the index first, if it
|
||||
// exists.
|
||||
func (p *Page) updateIndex() {
|
||||
index.Lock()
|
||||
defer index.Unlock()
|
||||
var id trigram.DocID
|
||||
// This function does not rely on files actually existing, so
|
||||
// let's quickly find the document id.
|
||||
var id docid
|
||||
// Reverse lookup! At least it's in memory.
|
||||
for docId, name := range index.documents {
|
||||
if name == p.Name {
|
||||
id = docId
|
||||
@@ -91,33 +154,120 @@ func (p *Page) updateIndex() {
|
||||
}
|
||||
}
|
||||
if id == 0 {
|
||||
id = index.index.Add(strings.ToLower(string(p.Body)))
|
||||
id = index.addDocument(p.Body)
|
||||
index.documents[id] = p.Name
|
||||
index.titles[p.Name] = p.Title
|
||||
} else {
|
||||
o, err := loadPage(p.Name)
|
||||
if err == nil {
|
||||
index.index.Delete(strings.ToLower(string(o.Body)), id)
|
||||
o.handleTitle(false)
|
||||
delete(index.titles, o.Title)
|
||||
if o, err := loadPage(p.Name); err == nil {
|
||||
index.deleteDocument(o.Body, id)
|
||||
}
|
||||
index.index.Insert(strings.ToLower(string(p.Body)), id)
|
||||
// Do not reuse the old id. We need a new one for
|
||||
// indexing to work.
|
||||
id = index.addDocument(p.Body)
|
||||
index.documents[id] = p.Name
|
||||
p.handleTitle(false)
|
||||
// The page name stays the same but the title may have
|
||||
// changed.
|
||||
index.titles[p.Name] = p.Title
|
||||
}
|
||||
}
|
||||
|
||||
// searchDocuments searches the index for a string. This requires the
|
||||
// index to be locked.
|
||||
func searchDocuments(q string) []string {
|
||||
words := strings.Fields(strings.ToLower(q))
|
||||
var trigrams []trigram.T
|
||||
for _, word := range words {
|
||||
trigrams = trigram.Extract(word, trigrams)
|
||||
// removeFromIndex removes the page from the index. Do this when
|
||||
// deleting a page.
|
||||
func (p *Page) removeFromIndex() {
|
||||
index.Lock()
|
||||
defer index.Unlock()
|
||||
var id docid
|
||||
// Reverse lookup! At least it's in memory.
|
||||
for docId, name := range index.documents {
|
||||
if name == p.Name {
|
||||
id = docId
|
||||
break
|
||||
}
|
||||
}
|
||||
ids := index.index.QueryTrigrams(trigrams)
|
||||
names := make([]string, len(ids))
|
||||
for i, id := range ids {
|
||||
names[i] = index.documents[id]
|
||||
if id == 0 {
|
||||
log.Printf("Page %s is not indexed", p.Name)
|
||||
return
|
||||
}
|
||||
return names
|
||||
o, err := loadPage(p.Name)
|
||||
if err != nil {
|
||||
log.Printf("Page %s cannot removed from the index: %s", p.Name, err)
|
||||
return
|
||||
}
|
||||
index.deleteDocument(o.Body, id)
|
||||
}
|
||||
|
||||
// search searches the index for a query string and returns page
|
||||
// names.
|
||||
func (idx *Index) search(q string) []string {
|
||||
index.RLock()
|
||||
defer index.RUnlock()
|
||||
names := make([]string, 0)
|
||||
hashtags := hashtags([]byte(q))
|
||||
if len(hashtags) > 0 {
|
||||
var r []docid
|
||||
for _, token := range hashtags {
|
||||
if ids, ok := idx.token[token]; ok {
|
||||
if r == nil {
|
||||
r = ids
|
||||
} else {
|
||||
r = intersection(r, ids)
|
||||
}
|
||||
} else {
|
||||
// Token doesn't exist therefore abort search.
|
||||
return nil
|
||||
}
|
||||
}
|
||||
for _, id := range r {
|
||||
names = append(names, idx.documents[id])
|
||||
}
|
||||
} else {
|
||||
for _, name := range idx.documents {
|
||||
names = append(names, name)
|
||||
}
|
||||
}
|
||||
return grep(tokens(q), names)
|
||||
}
|
||||
|
||||
func grep(tokens, names []string) []string {
|
||||
results := make([]string, 0)
|
||||
NameLoop:
|
||||
for _, name := range names {
|
||||
p, err := loadPage(name)
|
||||
if err != nil {
|
||||
log.Printf("Cannot load %s: %s", name, err)
|
||||
continue
|
||||
}
|
||||
body := strings.ToLower(string(p.Body))
|
||||
for _, token := range tokens {
|
||||
if !strings.Contains(body, token) {
|
||||
continue NameLoop
|
||||
}
|
||||
}
|
||||
results = append(results, name)
|
||||
}
|
||||
return results
|
||||
}
|
||||
|
||||
// intersection returns the set intersection between a and b.
|
||||
// a and b have to be sorted in ascending order and contain no duplicates.
|
||||
func intersection(a []docid, b []docid) []docid {
|
||||
maxLen := len(a)
|
||||
if len(b) > maxLen {
|
||||
maxLen = len(b)
|
||||
}
|
||||
r := make([]docid, 0, maxLen)
|
||||
var i, j int
|
||||
for i < len(a) && j < len(b) {
|
||||
if a[i] < b[j] {
|
||||
i++
|
||||
} else if a[i] > b[j] {
|
||||
j++
|
||||
} else {
|
||||
r = append(r, a[i])
|
||||
i++
|
||||
j++
|
||||
}
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
@@ -27,9 +27,10 @@ func TestSearchHashtag(t *testing.T) {
|
||||
assert.NotZero(t, len(pages))
|
||||
}
|
||||
|
||||
// wipes testdata
|
||||
func TestIndexUpdates(t *testing.T) {
|
||||
name := "test"
|
||||
_ = os.Remove(name + ".md")
|
||||
_ = os.RemoveAll("testdata")
|
||||
name := "testdata/test"
|
||||
index.load()
|
||||
p := &Page{Name: name, Body: []byte("This is a test.")}
|
||||
p.save()
|
||||
@@ -92,6 +93,6 @@ func TestIndexUpdates(t *testing.T) {
|
||||
assert.True(t, found)
|
||||
|
||||
t.Cleanup(func() {
|
||||
_ = os.Remove(name + ".md")
|
||||
_ = os.RemoveAll("testdata")
|
||||
})
|
||||
}
|
||||
|
||||
13
page.go
13
page.go
@@ -2,9 +2,9 @@ package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"github.com/microcosm-cc/bluemonday"
|
||||
"html/template"
|
||||
"log"
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
@@ -27,10 +27,12 @@ type Page struct {
|
||||
Hashtags []string
|
||||
}
|
||||
|
||||
// santize uses bluemonday to sanitize the HTML.
|
||||
// No exceptions are made because this is used for snippets.
|
||||
// santize uses bluemonday to sanitize the HTML. An exceptions is made
|
||||
// for the b tag because this is used for snippets.
|
||||
func sanitizeStrict(s string) template.HTML {
|
||||
return template.HTML(bluemonday.StrictPolicy().Sanitize(s))
|
||||
policy := bluemonday.StrictPolicy()
|
||||
policy.AllowElements("b")
|
||||
return template.HTML(policy.Sanitize(s))
|
||||
}
|
||||
|
||||
// santizeBytes uses bluemonday to sanitize the HTML.
|
||||
@@ -60,6 +62,7 @@ func (p *Page) save() error {
|
||||
filename := p.Name + ".md"
|
||||
s := bytes.ReplaceAll(p.Body, []byte{'\r'}, []byte{})
|
||||
if len(s) == 0 {
|
||||
p.removeFromIndex()
|
||||
_ = os.Rename(filename, filename+"~")
|
||||
return os.Remove(filename)
|
||||
}
|
||||
@@ -69,7 +72,7 @@ func (p *Page) save() error {
|
||||
if d != "." {
|
||||
err := os.MkdirAll(d, 0755)
|
||||
if err != nil {
|
||||
fmt.Printf("Creating directory %s failed", d)
|
||||
log.Printf("Creating directory %s failed: %s", d, err)
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
@@ -35,7 +35,7 @@ func wikiLink(p *parser.Parser, fn func(p *parser.Parser, data []byte, offset in
|
||||
// hashtag returns an inline parser function. This indirection is
|
||||
// required because we want to receive an array of hashtags found.
|
||||
func hashtag() (func(p *parser.Parser, data []byte, offset int) (int, ast.Node), *[]string) {
|
||||
hashtags := make([]string,0)
|
||||
hashtags := make([]string, 0)
|
||||
return func(p *parser.Parser, data []byte, offset int) (int, ast.Node) {
|
||||
data = data[offset:]
|
||||
i := 0
|
||||
@@ -49,8 +49,8 @@ func hashtag() (func(p *parser.Parser, data []byte, offset int) (int, ast.Node),
|
||||
hashtags = append(hashtags, string(data[1:i]))
|
||||
link := &ast.Link{
|
||||
AdditionalAttributes: []string{`class="tag"`},
|
||||
Destination: append([]byte("/search?q=%23"), data[1:i]...),
|
||||
Title: data[0:i],
|
||||
Destination: append([]byte("/search?q=%23"), data[1:i]...),
|
||||
Title: data[0:i],
|
||||
}
|
||||
text := bytes.ReplaceAll(data[0:i], []byte("_"), []byte(" "))
|
||||
ast.AppendChild(link, &ast.Text{Leaf: ast.Leaf{Literal: text}})
|
||||
|
||||
@@ -2,8 +2,8 @@ package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/google/subcommands"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"os"
|
||||
"testing"
|
||||
)
|
||||
@@ -32,7 +32,7 @@ You are no planet`)}
|
||||
1 change was made.
|
||||
This is a dry run. Use -confirm to make it happen.
|
||||
`
|
||||
|
||||
|
||||
b := new(bytes.Buffer)
|
||||
s := replaceCli(b, false, []string{`\bno planet`, `planetoid`})
|
||||
assert.Equal(t, subcommands.ExitSuccess, s)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"slices"
|
||||
"strconv"
|
||||
@@ -70,7 +70,7 @@ func load(names []string) []*Page {
|
||||
for i, name := range names {
|
||||
p, err := loadPage(name)
|
||||
if err != nil {
|
||||
fmt.Printf("Error loading %s\n", name)
|
||||
log.Printf("Error loading %s: %s", name, err)
|
||||
} else {
|
||||
items[i] = p
|
||||
}
|
||||
@@ -89,10 +89,8 @@ func search(q string, page int) ([]*Page, bool, int) {
|
||||
if len(q) == 0 {
|
||||
return make([]*Page, 0), false, 0
|
||||
}
|
||||
index.RLock()
|
||||
names := searchDocuments(q)
|
||||
names := index.search(q)
|
||||
slices.SortFunc(names, sortNames(q))
|
||||
index.RUnlock()
|
||||
from := itemsPerPage * (page - 1)
|
||||
if from > len(names) {
|
||||
return make([]*Page, 0), false, 0
|
||||
|
||||
@@ -1,20 +1,27 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"flag"
|
||||
"fmt"
|
||||
"github.com/google/subcommands"
|
||||
"io"
|
||||
"io/fs"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"slices"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type searchCmd struct {
|
||||
page int
|
||||
page int
|
||||
exact bool
|
||||
}
|
||||
|
||||
func (cmd *searchCmd) SetFlags(f *flag.FlagSet) {
|
||||
f.IntVar(&cmd.page, "page", 1, "the page in the search result set")
|
||||
f.BoolVar(&cmd.exact, "exact", false, "look for exact matches (do not use the trigram index)")
|
||||
}
|
||||
|
||||
func (*searchCmd) Name() string { return "search" }
|
||||
@@ -29,15 +36,21 @@ func (*searchCmd) Usage() string {
|
||||
}
|
||||
|
||||
func (cmd *searchCmd) Execute(_ context.Context, f *flag.FlagSet, _ ...interface{}) subcommands.ExitStatus {
|
||||
return searchCli(os.Stdout, cmd.page, f.Args())
|
||||
return searchCli(os.Stdout, cmd.page, cmd.exact, f.Args())
|
||||
}
|
||||
|
||||
// searchCli runs the search command on the command line. It is used
|
||||
// here with an io.Writer for easy testing.
|
||||
func searchCli(w io.Writer, n int, args []string) subcommands.ExitStatus {
|
||||
index.load()
|
||||
func searchCli(w io.Writer, n int, exact bool, args []string) subcommands.ExitStatus {
|
||||
var fn func(q string, n int) ([]*Page, bool, int)
|
||||
if exact {
|
||||
fn = searchExact
|
||||
} else {
|
||||
index.load()
|
||||
fn = search
|
||||
}
|
||||
for _, q := range args {
|
||||
items, more, _ := search(q, n)
|
||||
items, more, _ := fn(q, n)
|
||||
if len(items) == 1 {
|
||||
fmt.Fprintf(w, "Search for %s, page %d: 1 result\n", q, n)
|
||||
} else {
|
||||
@@ -52,3 +65,58 @@ func searchCli(w io.Writer, n int, args []string) subcommands.ExitStatus {
|
||||
}
|
||||
return subcommands.ExitSuccess
|
||||
}
|
||||
|
||||
// searchExact opens all the files and searches them, one by one.
|
||||
func searchExact(q string, page int) ([]*Page, bool, int) {
|
||||
if len(q) == 0 {
|
||||
return make([]*Page, 0), false, 0
|
||||
}
|
||||
terms := bytes.Fields([]byte(q))
|
||||
pages := make(map[string]*Page)
|
||||
names := make([]string, 0)
|
||||
index.titles = make(map[string]string)
|
||||
err := filepath.Walk(".", func(path string, info fs.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
filename := path
|
||||
if info.IsDir() || strings.HasPrefix(filename, ".") || !strings.HasSuffix(filename, ".md") {
|
||||
return nil
|
||||
}
|
||||
name := strings.TrimSuffix(filename, ".md")
|
||||
p, err := loadPage(name)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, term := range terms {
|
||||
if !bytes.Contains(p.Body, term) {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
p.handleTitle(false)
|
||||
pages[p.Name] = p
|
||||
index.titles[p.Name] = p.Title
|
||||
names = append(names, p.Name)
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return make([]*Page, 0), false, 0
|
||||
}
|
||||
slices.SortFunc(names, sortNames(q))
|
||||
from := itemsPerPage * (page - 1)
|
||||
if from > len(names) {
|
||||
return make([]*Page, 0), false, 0
|
||||
}
|
||||
to := from + itemsPerPage
|
||||
if to > len(names) {
|
||||
to = len(names)
|
||||
}
|
||||
items := make([]*Page, 0)
|
||||
for i := from; i < to; i++ {
|
||||
p := pages[names[i]]
|
||||
p.score(q)
|
||||
p.summarize(q)
|
||||
items = append(items, p)
|
||||
}
|
||||
return items, to < len(names), len(names)/itemsPerPage + 1
|
||||
}
|
||||
|
||||
@@ -2,14 +2,14 @@ package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"github.com/google/subcommands"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"testing"
|
||||
"github.com/google/subcommands"
|
||||
)
|
||||
|
||||
func TestSearchCmd(t *testing.T) {
|
||||
b := new(bytes.Buffer)
|
||||
s := searchCli(b, 1, []string{"oddµ"})
|
||||
s := searchCli(b, 1, false, []string{"oddµ"})
|
||||
assert.Equal(t, subcommands.ExitSuccess, s)
|
||||
r := `Search for oddµ, page 1: 2 results
|
||||
* [Oddµ: A minimal wiki](README) (5)
|
||||
|
||||
@@ -3,8 +3,8 @@ package main
|
||||
import (
|
||||
"github.com/stretchr/testify/assert"
|
||||
"net/url"
|
||||
"testing"
|
||||
"os"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestSearch(t *testing.T) {
|
||||
@@ -19,12 +19,14 @@ func TestSearchQuestionmark(t *testing.T) {
|
||||
_ = os.RemoveAll("testdata")
|
||||
p := &Page{Name: "testdata/Odd?", Body: []byte(`# Even?
|
||||
|
||||
yes or no?`)}
|
||||
We look at the plants.
|
||||
They need water. We need us.
|
||||
The silence streches.`)}
|
||||
p.save()
|
||||
data := url.Values{}
|
||||
data.Set("q", "yes")
|
||||
data.Set("q", "look")
|
||||
body := assert.HTTPBody(searchHandler, "GET", "/search", data)
|
||||
assert.Contains(t, body, "yes or no?")
|
||||
assert.Contains(t, body, "We <b>look</b>")
|
||||
assert.NotContains(t, body, "Odd?")
|
||||
assert.Contains(t, body, "Even?")
|
||||
}
|
||||
|
||||
19
snippets.go
19
snippets.go
@@ -1,20 +1,21 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"log"
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// re returns a regular expression matching any word in q.
|
||||
func re(q string) (*regexp.Regexp, error) {
|
||||
q = regexp.QuoteMeta(q)
|
||||
re, err := regexp.Compile(`\s+`)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
fields := strings.Fields(q)
|
||||
quoted := make([]string, len(fields))
|
||||
for i, w := range fields {
|
||||
quoted[i] = regexp.QuoteMeta(w)
|
||||
}
|
||||
words := re.ReplaceAllString(q, "|")
|
||||
re, err = regexp.Compile(`(?i)(` + words + `)`)
|
||||
re, err := regexp.Compile(`(?i)(` + strings.Join(quoted, "|") + `)`)
|
||||
if err != nil {
|
||||
log.Printf("Cannot compile %s %v: %s", q, quoted, err)
|
||||
return nil, err
|
||||
}
|
||||
return re, nil
|
||||
@@ -26,12 +27,16 @@ func snippets(q string, s string) string {
|
||||
maxsnippets := 4
|
||||
re, err := re(q)
|
||||
// If the compilation didn't work, truncate and return
|
||||
if err != nil || len(s) <= snippetlen {
|
||||
if err != nil {
|
||||
if len(s) > 400 {
|
||||
s = s[0:400] + " …"
|
||||
}
|
||||
return s
|
||||
}
|
||||
// Short cut for short pages
|
||||
if len(s) <= snippetlen {
|
||||
return highlight(q, re, s)
|
||||
}
|
||||
// show a snippet from the beginning of the document
|
||||
j := strings.LastIndex(s[:snippetlen], " ")
|
||||
if j == -1 {
|
||||
|
||||
56
tokenizer.go
Normal file
56
tokenizer.go
Normal file
@@ -0,0 +1,56 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"strings"
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// tokenize returns a slice of alphanumeric tokens for the given text.
|
||||
func tokenize(text string) []string {
|
||||
return strings.FieldsFunc(text, func(r rune) bool {
|
||||
return !unicode.IsLetter(r) && !unicode.IsNumber(r)
|
||||
})
|
||||
}
|
||||
|
||||
// lowercaseFilter returns a slice of lower case tokens.
|
||||
func lowercaseFilter(tokens []string) []string {
|
||||
r := make([]string, len(tokens))
|
||||
for i, token := range tokens {
|
||||
r[i] = strings.ToLower(token)
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
// tokens returns a slice of alphanumeric tokens.
|
||||
func tokens(text string) []string {
|
||||
tokens := tokenize(text)
|
||||
tokens = lowercaseFilter(tokens)
|
||||
return tokens
|
||||
}
|
||||
|
||||
// hashtags returns a slice of hashtags.
|
||||
func hashtags(s []byte) []string {
|
||||
hashtags := make([]string, 0)
|
||||
for {
|
||||
i := bytes.IndexRune(s, '#')
|
||||
if i == -1 {
|
||||
return hashtags
|
||||
}
|
||||
from := i
|
||||
i++
|
||||
for {
|
||||
r, n := utf8.DecodeRune(s[i:])
|
||||
if n > 0 && (unicode.IsLetter(r) || unicode.IsNumber(r) || r == '_') {
|
||||
i += n
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
if i > from+1 { // not just "#"
|
||||
hashtags = append(hashtags, string(bytes.ToLower(s[from:i])))
|
||||
}
|
||||
s = s[i:]
|
||||
}
|
||||
}
|
||||
13
tokenizer_test.go
Normal file
13
tokenizer_test.go
Normal file
@@ -0,0 +1,13 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"github.com/stretchr/testify/assert"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestTokenizer(t *testing.T) {
|
||||
assert.EqualValues(t, []string{}, tokens(""), "empty string")
|
||||
assert.EqualValues(t, []string{"franc"}, tokens("Franc"), "lower case")
|
||||
assert.EqualValues(t, []string{"i", "don", "t", "know", "what", "to", "do"}, tokens("I don't know what to do."))
|
||||
assert.EqualValues(t, []string{"#truth"}, hashtags([]byte("This is boring. #Truth")), "hashtags")
|
||||
}
|
||||
4
view.go
4
view.go
@@ -3,8 +3,8 @@ package main
|
||||
import (
|
||||
"net/http"
|
||||
"os"
|
||||
"time"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// rootHandler just redirects to /view/index.
|
||||
@@ -31,7 +31,7 @@ func viewHandler(w http.ResponseWriter, r *http.Request, name string) {
|
||||
file = false
|
||||
if strings.HasSuffix(fn, ".rss") {
|
||||
rss = true
|
||||
name = fn[0:len(fn)-4]
|
||||
name = fn[0 : len(fn)-4]
|
||||
fn = name
|
||||
}
|
||||
fn += ".md"
|
||||
|
||||
@@ -88,14 +88,13 @@ I like spring better
|
||||
assert.NoError(t, err)
|
||||
h := makeHandler(viewHandler, true)
|
||||
assert.Equal(t, []string{fi.ModTime().UTC().Format(http.TimeFormat)},
|
||||
HTTPHeaders(h, "GET", "/view/testdata/now", nil, "Last-Modified"))
|
||||
HTTPHeaders(h, "GET", "/view/testdata/now", nil, "Last-Modified"))
|
||||
HTTPStatusCodeIfModifiedSince(t, h, "/view/testdata/now", fi.ModTime())
|
||||
t.Cleanup(func() {
|
||||
_ = os.RemoveAll("testdata")
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
// wipes testdata
|
||||
func TestPageHead(t *testing.T) {
|
||||
_ = os.RemoveAll("testdata")
|
||||
|
||||
14
wiki.go
14
wiki.go
@@ -3,9 +3,9 @@ package main
|
||||
import (
|
||||
"context"
|
||||
"flag"
|
||||
"fmt"
|
||||
"github.com/google/subcommands"
|
||||
"html/template"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"regexp"
|
||||
@@ -69,12 +69,12 @@ func getPort() string {
|
||||
// and after. For testing, call index.load directly and skip the
|
||||
// messages.
|
||||
func scheduleLoadIndex() {
|
||||
fmt.Print("Indexing pages\n")
|
||||
log.Print("Indexing pages")
|
||||
n, err := index.load()
|
||||
if err == nil {
|
||||
fmt.Printf("Indexed %d pages\n", n)
|
||||
log.Printf("Indexed %d pages", n)
|
||||
} else {
|
||||
fmt.Println("Indexing failed")
|
||||
log.Printf("Indexing failed: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -82,9 +82,9 @@ func scheduleLoadIndex() {
|
||||
// and after. For testing, call loadLanguages directly and skip the
|
||||
// messages.
|
||||
func scheduleLoadLanguages() {
|
||||
fmt.Print("Loading languages\n")
|
||||
log.Print("Loading languages")
|
||||
n := loadLanguages()
|
||||
fmt.Printf("Loaded %d languages\n", n)
|
||||
log.Printf("Loaded %d languages", n)
|
||||
}
|
||||
|
||||
func serve() {
|
||||
@@ -101,7 +101,7 @@ func serve() {
|
||||
go scheduleLoadLanguages()
|
||||
initAccounts()
|
||||
port := getPort()
|
||||
fmt.Printf("Serving a wiki on port %s\n", port)
|
||||
log.Printf("Serving a wiki on port %s", port)
|
||||
http.ListenAndServe(":"+port, nil)
|
||||
}
|
||||
|
||||
|
||||
@@ -66,6 +66,7 @@ func HTTPUploadAndRedirectTo(t *testing.T, handler http.HandlerFunc, url, conten
|
||||
"Expected HTTP redirect location %s for %q but received %v", destination, url, headers)
|
||||
return isRedirectCode
|
||||
}
|
||||
|
||||
// HTTPStatusCodeIfModifiedSince checks that the request results in a
|
||||
// 304 response for the given time.
|
||||
func HTTPStatusCodeIfModifiedSince(t *testing.T, handler http.HandlerFunc, url string, ti time.Time) {
|
||||
|
||||
Reference in New Issue
Block a user