oddmu/missing_cmd.go

package main

import (
	"context"
	"flag"
	"fmt"
	"io"
	"net/url"
	"os"
	"path"
	"strings"

	"github.com/gomarkdown/markdown"
	"github.com/gomarkdown/markdown/ast"
	"github.com/google/subcommands"
)

type missingCmd struct {
}

func (*missingCmd) Name() string     { return "missing" }
func (*missingCmd) Synopsis() string { return "list missing pages" }
func (*missingCmd) Usage() string {
	return `missing:
  Listing pages with links to missing pages. This command does not
  understand links to directories being redirected to index pages.
  A link such as [up](..) is reported as a link to a missing page.
  Rewrite it as [up](../index) for it to work as intended.
`
}

func (cmd *missingCmd) SetFlags(f *flag.FlagSet) {
}

func (cmd *missingCmd) Execute(_ context.Context, f *flag.FlagSet, _ ...interface{}) subcommands.ExitStatus {
	n, err := index.load()
	if err != nil {
		fmt.Fprintf(os.Stderr, "Index load: %s\n", err)
		return subcommands.ExitFailure
	}
	fmt.Fprintf(os.Stderr, "Indexed %d pages\n", n)
	return missingCli(os.Stdout, &index)
}

// missingCli implements the finding of links to missing pages. In order to make testing easier, it takes a Writer and
// an indexStore. The Writer is important so that test code can provide a buffer instead of os.Stdout; the indexStore is
// important so that test code can ensure no other test running in parallel can interfere with the list of known pages
// (by adding or deleting pages).
func missingCli(w io.Writer, idx *indexStore) subcommands.ExitStatus {
	found := false
	for name := range idx.titles {
		p, err := loadPage(name)
		if err != nil {
			fmt.Fprintf(os.Stderr, "Loading %s: %s\n", name, err)
			return subcommands.ExitFailure
		}
		for _, link := range p.links() {
			u, err := url.Parse(link)
			if err != nil {
				fmt.Fprintln(os.Stderr, p.Name, err)
				return subcommands.ExitFailure
			}
			if u.Scheme == "" && u.Path != "" && !strings.HasPrefix(u.Path, "/") {
				// feeds can work if the matching page works
				u.Path = strings.TrimSuffix(u.Path, ".rss")
				// links to the source file can work
				u.Path = strings.TrimSuffix(u.Path, ".md")
				// pages containing a colon need the ./ prefix
				u.Path = strings.TrimPrefix(u.Path, "./")
				// check whether the destination is a known page
				destination, err := url.PathUnescape(u.Path)
				if err != nil {
					fmt.Fprintf(os.Stderr, "Cannot decode %s: %s\n", link, err)
					return subcommands.ExitFailure
				}
				_, ok := idx.titles[destination]
				// links to directories can work
				if !ok {
					_, ok = idx.titles[path.Join(destination, "index")]
				}
				if !ok {
					if !found {
						fmt.Fprintln(w, "Page\tMissing")
						found = true
					}
					fmt.Fprintf(w, "%s\t%s\n", p.Name, link)
				}
			}
		}
	}
	if !found {
		fmt.Fprintln(w, "No missing pages found.")
	}
	return subcommands.ExitSuccess
}

// links parses the page content and returns an array of link destinations.
func (p *Page) links() []string {
	var links []string
	parser, _ := wikiParser()
	doc := markdown.Parse(p.Body, parser)
	ast.WalkFunc(doc, func(node ast.Node, entering bool) ast.WalkStatus {
		if entering {
			if v, ok := node.(*ast.Link); ok {
				link := string(v.Destination)
				url, err := url.Parse(link)
				if err != nil {
					// no error reporting
					return ast.GoToNext
				}
				if url.IsAbs() {
					links = append(links, link)
				} else {
					dir := p.Dir()
					links = append(links, path.Join(dir, link))
				}
			}
		}
		return ast.GoToNext
	})
	return links
}