forked from mirror/oddmu
This change depends on a change to the markdown library. Specifically, the parser's InsideLink must be public. This means that the #like_this hashtag from the README.md in the source directory is no longer available, so that test had to be rewritten. Another change to reduce the number of false hashtags was to use the hashtag parser for all situations: It's also used to identify hashtags in the search query string. The parser doesn't automatically turn the matches to lower-case, however, so that has to be done when indexing documents and when searching for hashtags. The hashtags command for the commandline no longer prints a hash for all the tags.
178 lines
5.4 KiB
Go
178 lines
5.4 KiB
Go
package main
|
|
|
|
import (
|
|
"bytes"
|
|
"github.com/gomarkdown/markdown"
|
|
"github.com/gomarkdown/markdown/ast"
|
|
"github.com/gomarkdown/markdown/html"
|
|
"github.com/gomarkdown/markdown/parser"
|
|
"net/url"
|
|
"path"
|
|
"path/filepath"
|
|
)
|
|
|
|
// wikiLink returns an inline parser function. This indirection is
|
|
// required because we want to call the previous definition in case
|
|
// this is not a wikiLink.
|
|
func wikiLink(fn func(p *parser.Parser, data []byte, offset int) (int, ast.Node)) func(p *parser.Parser, data []byte, offset int) (int, ast.Node) {
|
|
return func(p *parser.Parser, original []byte, offset int) (int, ast.Node) {
|
|
data := original[offset:]
|
|
n := len(data)
|
|
// minimum: [[X]]
|
|
if n < 5 || data[1] != '[' {
|
|
return fn(p, original, offset)
|
|
}
|
|
i := 2
|
|
for i+1 < n && data[i] != ']' && data[i+1] != ']' {
|
|
i++
|
|
}
|
|
text := data[2 : i+1]
|
|
link := &ast.Link{
|
|
Destination: []byte(url.PathEscape(string(text))),
|
|
}
|
|
ast.AppendChild(link, &ast.Text{Leaf: ast.Leaf{Literal: text}})
|
|
return i + 3, link
|
|
}
|
|
}
|
|
|
|
// hashtag returns an inline parser function. This indirection is
|
|
// required because we want to receive an array of hashtags found.
|
|
// The hashtags in the array keep their case.
|
|
func hashtag() (func(p *parser.Parser, data []byte, offset int) (int, ast.Node), *[]string) {
|
|
hashtags := make([]string, 0)
|
|
return func(p *parser.Parser, data []byte, offset int) (int, ast.Node) {
|
|
if p.InsideLink {
|
|
return 0, nil
|
|
}
|
|
data = data[offset:]
|
|
i := 0
|
|
n := len(data)
|
|
for i < n && !parser.IsSpace(data[i]) {
|
|
i++
|
|
}
|
|
if i <= 1 {
|
|
return 0, nil
|
|
}
|
|
hashtags = append(hashtags, string(data[1:i]))
|
|
link := &ast.Link{
|
|
AdditionalAttributes: []string{`class="tag"`},
|
|
Destination: append([]byte("/search/?q=%23"), data[1:i]...),
|
|
}
|
|
text := bytes.ReplaceAll(data[0:i], []byte("_"), []byte(" "))
|
|
ast.AppendChild(link, &ast.Text{Leaf: ast.Leaf{Literal: text}})
|
|
return i, link
|
|
}, &hashtags
|
|
}
|
|
|
|
// wikiParser returns a parser with the Oddmu specific changes. Specifically: [[wiki links]], #hash_tags,
|
|
// @webfinger@accounts. It also uses the CommonExtensions and Block Attributes, and no MathJax ($).
|
|
func wikiParser() (*parser.Parser, *[]string) {
|
|
extensions := (parser.CommonExtensions | parser.AutoHeadingIDs | parser.Attributes) & ^parser.MathJax
|
|
p := parser.NewWithExtensions(extensions)
|
|
prev := p.RegisterInline('[', nil)
|
|
p.RegisterInline('[', wikiLink(prev))
|
|
fn, hashtags := hashtag()
|
|
p.RegisterInline('#', fn)
|
|
if useWebfinger {
|
|
p.RegisterInline('@', accountLink)
|
|
parser.EscapeChars = append(parser.EscapeChars, '@')
|
|
}
|
|
return p, hashtags
|
|
}
|
|
|
|
// wikiRenderer is a Renderer for Markdown that adds lazy loading of images and disables fractions support. Remember
|
|
// that there is no HTML sanitization.
|
|
func wikiRenderer() *html.Renderer {
|
|
// sync with staticPage
|
|
htmlFlags := html.CommonFlags & ^html.SmartypantsFractions | html.LazyLoadImages
|
|
opts := html.RendererOptions{Flags: htmlFlags}
|
|
renderer := html.NewRenderer(opts)
|
|
return renderer
|
|
}
|
|
|
|
// renderHtml renders the Page.Body to HTML and sets Page.Html, Page.Hashtags, and escapes Page.Name.
|
|
func (p *Page) renderHtml() {
|
|
parser, hashtags := wikiParser()
|
|
renderer := wikiRenderer()
|
|
maybeUnsafeHTML := markdown.ToHTML(p.Body, parser, renderer)
|
|
p.Name = nameEscape(p.Name)
|
|
p.Html = unsafeBytes(maybeUnsafeHTML)
|
|
p.Hashtags = *hashtags
|
|
}
|
|
|
|
// plainText renders the Page.Body to plain text and returns it,
|
|
// ignoring all the Markdown and all the newlines. The result is one
|
|
// long single line of text.
|
|
func (p *Page) plainText() string {
|
|
parser := parser.New()
|
|
doc := markdown.Parse(p.Body, parser)
|
|
text := []byte("")
|
|
ast.WalkFunc(doc, func(node ast.Node, entering bool) ast.WalkStatus {
|
|
if entering && node.AsLeaf() != nil {
|
|
text = append(text, node.AsLeaf().Literal...)
|
|
text = append(text, []byte(" ")...)
|
|
}
|
|
return ast.GoToNext
|
|
})
|
|
// Some Markdown still contains newlines
|
|
for i, c := range text {
|
|
if c == '\n' {
|
|
text[i] = ' '
|
|
}
|
|
}
|
|
// Remove trailing space
|
|
for len(text) > 0 && text[len(text)-1] == ' ' {
|
|
text = text[0 : len(text)-1]
|
|
}
|
|
return string(text)
|
|
}
|
|
|
|
// images returns an array of ImageData.
|
|
func (p *Page) images() []ImageData {
|
|
dir := path.Dir(filepath.ToSlash(p.Name))
|
|
images := make([]ImageData, 0)
|
|
parser := parser.New()
|
|
doc := markdown.Parse(p.Body, parser)
|
|
ast.WalkFunc(doc, func(node ast.Node, entering bool) ast.WalkStatus {
|
|
if entering {
|
|
switch v := node.(type) {
|
|
case *ast.Image:
|
|
// not an absolute URL, not a full URL, not a mailto: URI
|
|
text := toString(v)
|
|
if len(text) > 0 {
|
|
name := path.Join(dir, string(v.Destination))
|
|
image := ImageData{Title: text, Name: name}
|
|
images = append(images, image)
|
|
}
|
|
return ast.SkipChildren
|
|
}
|
|
}
|
|
return ast.GoToNext
|
|
})
|
|
return images
|
|
}
|
|
|
|
// hashtags returns an array of hashtags
|
|
func hashtags(s []byte) []string {
|
|
parser, hashtags := wikiParser()
|
|
markdown.Parse(s, parser)
|
|
return *hashtags
|
|
}
|
|
|
|
// toString for a node returns the text nodes' literals, concatenated. There is no whitespace added so the expectation
|
|
// is that there is only one child node. Otherwise, there may be a space missing between the literals, depending on the
|
|
// exact child nodes they belong to.
|
|
func toString(node ast.Node) string {
|
|
b := new(bytes.Buffer)
|
|
ast.WalkFunc(node, func(node ast.Node, entering bool) ast.WalkStatus {
|
|
if entering {
|
|
switch v := node.(type) {
|
|
case *ast.Text:
|
|
b.Write(v.Literal)
|
|
}
|
|
}
|
|
return ast.GoToNext
|
|
})
|
|
return b.String()
|
|
}
|