diff --git a/hashtags_cmd.go b/hashtags_cmd.go index eb7bbb9..ac6f594 100644 --- a/hashtags_cmd.go +++ b/hashtags_cmd.go @@ -5,15 +5,26 @@ import ( "flag" "fmt" "github.com/google/subcommands" + "github.com/gomarkdown/markdown" + "github.com/gomarkdown/markdown/ast" + "github.com/hexops/gotextdiff" + "github.com/hexops/gotextdiff/myers" + "github.com/hexops/gotextdiff/span" "io" "os" + "regexp" "sort" + "strings" ) type hashtagsCmd struct { + update bool + dryRun bool } func (cmd *hashtagsCmd) SetFlags(f *flag.FlagSet) { + f.BoolVar(&cmd.update, "update", false, "create and update hashtag pages") + f.BoolVar(&cmd.dryRun, "dry-run", false, "only report the changes it would make") } func (*hashtagsCmd) Name() string { return "hashtags" } @@ -25,6 +36,9 @@ func (*hashtagsCmd) Usage() string { } func (cmd *hashtagsCmd) Execute(_ context.Context, f *flag.FlagSet, _ ...interface{}) subcommands.ExitStatus { + if cmd.update { + return hashtagsUpdateCli(os.Stdout, cmd.dryRun) + } return hashtagsCli(os.Stdout) } @@ -57,3 +71,138 @@ func hashtagsCli(w io.Writer) subcommands.ExitStatus { return subcommands.ExitSuccess } + +// hashtagsUpdateCli runs the hashtags command on the command line and creates and updates the hashtag pages in the +// current directory. That is, pages in subdirectories are skipped! It is used here with an io.Writer for easy testing. +func hashtagsUpdateCli(w io.Writer, dryRun bool) subcommands.ExitStatus { + index.load() + // no locking necessary since this is for the command-line + namesMap := make(map[string]string) + for hashtag, docids := range index.token { + if len(docids) <= 5 { + if dryRun { + fmt.Fprintf(w, "Skipping #%s because there are not enough entries (%d)\n", hashtag, len(docids)) + } + continue + } + title, ok := namesMap[hashtag] + if (!ok) { + title = hashtagName(namesMap, hashtag, docids) + namesMap[hashtag] = title + } + pageName := strings.ReplaceAll(title, " ", "_") + h, err := loadPage(pageName) + original := "" + new := false + if err != nil { + new = true + h = &Page{Name: pageName, Body: []byte("# " + title + "\n\n#" + pageName + "\n\nBlog posts:\n\n")} + } else { + original = string(h.Body) + } + for _, docid := range docids { + name := index.documents[docid] + if strings.Contains(name, "/") { + continue + } + p, err := loadPage(name) + if err != nil { + fmt.Fprintf(w, "Loading %s: %s\n", name, err) + return subcommands.ExitFailure + } + if !p.IsBlog() { + continue + } + p.handleTitle(false) + if p.Title == "" { + p.Title = p.Name + } + esc := nameEscape(p.Base()) + link := "* [" + p.Title + "](" + esc + ")\n" + // I guess & used to get escaped and now no longer does + re := regexp.MustCompile(`(?m)^\* \[[^\]]+\]\(` + strings.ReplaceAll(esc, "&", "(&|%26)") + `\)\n`) + addLinkToPage(h, link, re) + } + // only save if something changed + if string(h.Body) != original { + if dryRun { + if new { + fmt.Fprintf(w, "Creating %s.md\n", title) + } else { + fmt.Fprintf(w, "Updating %s.md\n", title) + } + fn := h.Name + ".md" + edits := myers.ComputeEdits(span.URIFromPath(fn), original, string(h.Body)) + diff := fmt.Sprint(gotextdiff.ToUnified(fn + "~", fn, original, edits)) + fmt.Fprint(w, diff) + } else { + err = h.save() + if err != nil { + fmt.Fprintf(w, "Saving hashtag %s failed: %s", hashtag, err) + return subcommands.ExitFailure + } + } + } + } + return subcommands.ExitSuccess +} + +// Go through all the documents in the same directory and look for hashtag matches in the rendered HTML in order to +// determine the most likely capitalization. +func hashtagName (namesMap map[string]string, hashtag string, docids []docid) string { + candidate := make(map[string]int) + var mostPopular string + for _, docid := range docids { + name := index.documents[docid] + if strings.Contains(name, "/") { + continue + } + p, err := loadPage(name) + if err != nil { + continue + } + // parsing finds all the hashtags + parser, _ := wikiParser() + doc := markdown.Parse(p.Body, parser) + ast.WalkFunc(doc, func(node ast.Node, entering bool) ast.WalkStatus { + if entering { + switch v := node.(type) { + case *ast.Link: + for _, attr := range v.AdditionalAttributes { + if attr == `class="tag"` { + tagName := []byte("") + ast.WalkFunc(v, func(node ast.Node, entering bool) ast.WalkStatus { + if entering && node.AsLeaf() != nil { + tagName = append(tagName, node.AsLeaf().Literal...) + } + return ast.GoToNext + }) + tag := string(tagName[1:]) + if strings.EqualFold(hashtag, strings.ReplaceAll(tag, " ", "_")) { + _, ok := candidate[tag] + if ok { + candidate[tag] += 1 + } else { + candidate[tag] = 1 + } + } + } + } + } + } + return ast.GoToNext + }) + count := 0 + for key, val := range candidate { + if val > count { + mostPopular = key + count = val + } + } + // shortcut + if count >= 5 { + return mostPopular + } + } + return mostPopular +} diff --git a/man/oddmu-hashtags.1 b/man/oddmu-hashtags.1 index 96c05eb..9fc3cd1 100644 --- a/man/oddmu-hashtags.1 +++ b/man/oddmu-hashtags.1 @@ -5,20 +5,31 @@ .nh .ad l .\" Begin generated content: -.TH "ODDMU-HASHTAGS" "1" "2024-08-29" +.TH "ODDMU-HASHTAGS" "1" "2025-08-09" .PP .SH NAME .PP -oddmu-hashtags - count the hashtags used +oddmu-hashtags - work with hashtags .PP .SH SYNOPSIS .PP \fBoddmu hashtags\fR .PP +\fBoddmu hashtags -update\fR [\fB-dry-run\fR] +.PP .SH DESCRIPTION .PP -The "hashtags" subcommand counts all the hashtags used and lists them, separated -by a TAB character.\& +By default, the "hashtags" subcommand counts all the hashtags used and lists +them, separated by a TAB character.\& +.PP +With the \fB-update\fR flag, the hashtag pages are update with links to all the blog +pages having the corresponding tag.\& This only necessary when migrating a +collection of Markdown files.\& Ordinarily, Oddmu maintains the hashtag pages +automatically.\& When writing pages offline, use \fIoddmu-notify\fR(1) to update the +hashtag pages.\& +.PP +Use the \fB-dry-run\fR flag to see what would change with the \fB-update\fR flag without +actually changing any files.\& .PP .SH EXAMPLES .PP @@ -30,6 +41,22 @@ oddmu hashtags | head -n 11 .fi .RE .PP +See what kind of changes Oddmu would suggest: +.PP +.nf +.RS 4 +oddmu hashtags -update -dry-run +.fi +.RE +.PP +And then do it: +.PP +.nf +.RS 4 +oddmu hashtags -update +.fi +.RE +.PP .SH SEE ALSO .PP \fIoddmu\fR(1) diff --git a/man/oddmu-hashtags.1.txt b/man/oddmu-hashtags.1.txt index 06d7c80..efc412e 100644 --- a/man/oddmu-hashtags.1.txt +++ b/man/oddmu-hashtags.1.txt @@ -2,16 +2,27 @@ ODDMU-HASHTAGS(1) # NAME -oddmu-hashtags - count the hashtags used +oddmu-hashtags - work with hashtags # SYNOPSIS *oddmu hashtags* +*oddmu hashtags -update* [*-dry-run*] + # DESCRIPTION -The "hashtags" subcommand counts all the hashtags used and lists them, separated -by a TAB character. +By default, the "hashtags" subcommand counts all the hashtags used and lists +them, separated by a TAB character. + +With the *-update* flag, the hashtag pages are update with links to all the blog +pages having the corresponding tag. This only necessary when migrating a +collection of Markdown files. Ordinarily, Oddmu maintains the hashtag pages +automatically. When writing pages offline, use _oddmu-notify_(1) to update the +hashtag pages. + +Use the *-dry-run* flag to see what would change with the *-update* flag without +actually changing any files. # EXAMPLES @@ -21,6 +32,18 @@ List the top 10 hashtags. This requires 11 lines because of the header line. oddmu hashtags | head -n 11 ``` +See what kind of changes Oddmu would suggest: + +``` +oddmu hashtags -update -dry-run +``` + +And then do it: + +``` +oddmu hashtags -update +``` + # SEE ALSO _oddmu_(1) diff --git a/man/oddmu.1 b/man/oddmu.1 index 515afb6..9337342 100644 --- a/man/oddmu.1 +++ b/man/oddmu.1 @@ -5,7 +5,7 @@ .nh .ad l .\" Begin generated content: -.TH "ODDMU" "1" "2025-07-16" +.TH "ODDMU" "1" "2025-08-09" .PP .SH NAME .PP @@ -384,7 +384,7 @@ Oddmu running as a webserver: .PP .PD 0 .IP \(bu 4 -\fIoddmu-hashtags\fR(1), on how to count the hashtags used +\fIoddmu-hashtags\fR(1), on working with hashtags .IP \(bu 4 \fIoddmu-html\fR(1), on how to render a page .IP \(bu 4 diff --git a/man/oddmu.1.txt b/man/oddmu.1.txt index 9731a27..cb5af47 100644 --- a/man/oddmu.1.txt +++ b/man/oddmu.1.txt @@ -315,7 +315,7 @@ If you run Oddmu as a web server: If you run Oddmu as a static site generator or pages offline and sync them with Oddmu running as a webserver: -- _oddmu-hashtags_(1), on how to count the hashtags used +- _oddmu-hashtags_(1), on working with hashtags - _oddmu-html_(1), on how to render a page - _oddmu-list_(1), on how to list pages and titles - _oddmu-links_(1), on how to list the outgoing links for a page