Files
oddmu/parser.go
Alex Schroeder e1ba007f97 Don't link hashtags in link text
This change depends on a change to the markdown library. Specifically,
the parser's InsideLink must be public.

This means that the #like_this hashtag from the README.md in the
source directory is no longer available, so that test had to be
rewritten.

Another change to reduce the number of false hashtags was to use the
hashtag parser for all situations: It's also used to identify hashtags
in the search query string. The parser doesn't automatically turn the
matches to lower-case, however, so that has to be done when indexing
documents and when searching for hashtags.

The hashtags command for the commandline no longer prints a hash for
all the tags.
2025-02-07 20:05:36 +01:00

178 lines
5.4 KiB
Go

package main
import (
"bytes"
"github.com/gomarkdown/markdown"
"github.com/gomarkdown/markdown/ast"
"github.com/gomarkdown/markdown/html"
"github.com/gomarkdown/markdown/parser"
"net/url"
"path"
"path/filepath"
)
// wikiLink returns an inline parser function. This indirection is
// required because we want to call the previous definition in case
// this is not a wikiLink.
func wikiLink(fn func(p *parser.Parser, data []byte, offset int) (int, ast.Node)) func(p *parser.Parser, data []byte, offset int) (int, ast.Node) {
return func(p *parser.Parser, original []byte, offset int) (int, ast.Node) {
data := original[offset:]
n := len(data)
// minimum: [[X]]
if n < 5 || data[1] != '[' {
return fn(p, original, offset)
}
i := 2
for i+1 < n && data[i] != ']' && data[i+1] != ']' {
i++
}
text := data[2 : i+1]
link := &ast.Link{
Destination: []byte(url.PathEscape(string(text))),
}
ast.AppendChild(link, &ast.Text{Leaf: ast.Leaf{Literal: text}})
return i + 3, link
}
}
// hashtag returns an inline parser function. This indirection is
// required because we want to receive an array of hashtags found.
// The hashtags in the array keep their case.
func hashtag() (func(p *parser.Parser, data []byte, offset int) (int, ast.Node), *[]string) {
hashtags := make([]string, 0)
return func(p *parser.Parser, data []byte, offset int) (int, ast.Node) {
if p.InsideLink {
return 0, nil
}
data = data[offset:]
i := 0
n := len(data)
for i < n && !parser.IsSpace(data[i]) {
i++
}
if i <= 1 {
return 0, nil
}
hashtags = append(hashtags, string(data[1:i]))
link := &ast.Link{
AdditionalAttributes: []string{`class="tag"`},
Destination: append([]byte("/search/?q=%23"), data[1:i]...),
}
text := bytes.ReplaceAll(data[0:i], []byte("_"), []byte(" "))
ast.AppendChild(link, &ast.Text{Leaf: ast.Leaf{Literal: text}})
return i, link
}, &hashtags
}
// wikiParser returns a parser with the Oddmu specific changes. Specifically: [[wiki links]], #hash_tags,
// @webfinger@accounts. It also uses the CommonExtensions and Block Attributes, and no MathJax ($).
func wikiParser() (*parser.Parser, *[]string) {
extensions := (parser.CommonExtensions | parser.AutoHeadingIDs | parser.Attributes) & ^parser.MathJax
p := parser.NewWithExtensions(extensions)
prev := p.RegisterInline('[', nil)
p.RegisterInline('[', wikiLink(prev))
fn, hashtags := hashtag()
p.RegisterInline('#', fn)
if useWebfinger {
p.RegisterInline('@', accountLink)
parser.EscapeChars = append(parser.EscapeChars, '@')
}
return p, hashtags
}
// wikiRenderer is a Renderer for Markdown that adds lazy loading of images and disables fractions support. Remember
// that there is no HTML sanitization.
func wikiRenderer() *html.Renderer {
// sync with staticPage
htmlFlags := html.CommonFlags & ^html.SmartypantsFractions | html.LazyLoadImages
opts := html.RendererOptions{Flags: htmlFlags}
renderer := html.NewRenderer(opts)
return renderer
}
// renderHtml renders the Page.Body to HTML and sets Page.Html, Page.Hashtags, and escapes Page.Name.
func (p *Page) renderHtml() {
parser, hashtags := wikiParser()
renderer := wikiRenderer()
maybeUnsafeHTML := markdown.ToHTML(p.Body, parser, renderer)
p.Name = nameEscape(p.Name)
p.Html = unsafeBytes(maybeUnsafeHTML)
p.Hashtags = *hashtags
}
// plainText renders the Page.Body to plain text and returns it,
// ignoring all the Markdown and all the newlines. The result is one
// long single line of text.
func (p *Page) plainText() string {
parser := parser.New()
doc := markdown.Parse(p.Body, parser)
text := []byte("")
ast.WalkFunc(doc, func(node ast.Node, entering bool) ast.WalkStatus {
if entering && node.AsLeaf() != nil {
text = append(text, node.AsLeaf().Literal...)
text = append(text, []byte(" ")...)
}
return ast.GoToNext
})
// Some Markdown still contains newlines
for i, c := range text {
if c == '\n' {
text[i] = ' '
}
}
// Remove trailing space
for len(text) > 0 && text[len(text)-1] == ' ' {
text = text[0 : len(text)-1]
}
return string(text)
}
// images returns an array of ImageData.
func (p *Page) images() []ImageData {
dir := path.Dir(filepath.ToSlash(p.Name))
images := make([]ImageData, 0)
parser := parser.New()
doc := markdown.Parse(p.Body, parser)
ast.WalkFunc(doc, func(node ast.Node, entering bool) ast.WalkStatus {
if entering {
switch v := node.(type) {
case *ast.Image:
// not an absolute URL, not a full URL, not a mailto: URI
text := toString(v)
if len(text) > 0 {
name := path.Join(dir, string(v.Destination))
image := ImageData{Title: text, Name: name}
images = append(images, image)
}
return ast.SkipChildren
}
}
return ast.GoToNext
})
return images
}
// hashtags returns an array of hashtags
func hashtags(s []byte) []string {
parser, hashtags := wikiParser()
markdown.Parse(s, parser)
return *hashtags
}
// toString for a node returns the text nodes' literals, concatenated. There is no whitespace added so the expectation
// is that there is only one child node. Otherwise, there may be a space missing between the literals, depending on the
// exact child nodes they belong to.
func toString(node ast.Node) string {
b := new(bytes.Buffer)
ast.WalkFunc(node, func(node ast.Node, entering bool) ast.WalkStatus {
if entering {
switch v := node.(type) {
case *ast.Text:
b.Write(v.Literal)
}
}
return ast.GoToNext
})
return b.String()
}