Add -update and -dry-run options to hashtags command

This commit is contained in:
Alex Schroeder
2025-08-09 18:18:11 +02:00
parent 7c5a3860e7
commit cd6809d791
5 changed files with 209 additions and 10 deletions

View File

@@ -5,15 +5,26 @@ import (
"flag"
"fmt"
"github.com/google/subcommands"
"github.com/gomarkdown/markdown"
"github.com/gomarkdown/markdown/ast"
"github.com/hexops/gotextdiff"
"github.com/hexops/gotextdiff/myers"
"github.com/hexops/gotextdiff/span"
"io"
"os"
"regexp"
"sort"
"strings"
)
type hashtagsCmd struct {
update bool
dryRun bool
}
func (cmd *hashtagsCmd) SetFlags(f *flag.FlagSet) {
f.BoolVar(&cmd.update, "update", false, "create and update hashtag pages")
f.BoolVar(&cmd.dryRun, "dry-run", false, "only report the changes it would make")
}
func (*hashtagsCmd) Name() string { return "hashtags" }
@@ -25,6 +36,9 @@ func (*hashtagsCmd) Usage() string {
}
func (cmd *hashtagsCmd) Execute(_ context.Context, f *flag.FlagSet, _ ...interface{}) subcommands.ExitStatus {
if cmd.update {
return hashtagsUpdateCli(os.Stdout, cmd.dryRun)
}
return hashtagsCli(os.Stdout)
}
@@ -57,3 +71,138 @@ func hashtagsCli(w io.Writer) subcommands.ExitStatus {
return subcommands.ExitSuccess
}
// hashtagsUpdateCli runs the hashtags command on the command line and creates and updates the hashtag pages in the
// current directory. That is, pages in subdirectories are skipped! It is used here with an io.Writer for easy testing.
func hashtagsUpdateCli(w io.Writer, dryRun bool) subcommands.ExitStatus {
index.load()
// no locking necessary since this is for the command-line
namesMap := make(map[string]string)
for hashtag, docids := range index.token {
if len(docids) <= 5 {
if dryRun {
fmt.Fprintf(w, "Skipping #%s because there are not enough entries (%d)\n", hashtag, len(docids))
}
continue
}
title, ok := namesMap[hashtag]
if (!ok) {
title = hashtagName(namesMap, hashtag, docids)
namesMap[hashtag] = title
}
pageName := strings.ReplaceAll(title, " ", "_")
h, err := loadPage(pageName)
original := ""
new := false
if err != nil {
new = true
h = &Page{Name: pageName, Body: []byte("# " + title + "\n\n#" + pageName + "\n\nBlog posts:\n\n")}
} else {
original = string(h.Body)
}
for _, docid := range docids {
name := index.documents[docid]
if strings.Contains(name, "/") {
continue
}
p, err := loadPage(name)
if err != nil {
fmt.Fprintf(w, "Loading %s: %s\n", name, err)
return subcommands.ExitFailure
}
if !p.IsBlog() {
continue
}
p.handleTitle(false)
if p.Title == "" {
p.Title = p.Name
}
esc := nameEscape(p.Base())
link := "* [" + p.Title + "](" + esc + ")\n"
// I guess & used to get escaped and now no longer does
re := regexp.MustCompile(`(?m)^\* \[[^\]]+\]\(` + strings.ReplaceAll(esc, "&", "(&|%26)") + `\)\n`)
addLinkToPage(h, link, re)
}
// only save if something changed
if string(h.Body) != original {
if dryRun {
if new {
fmt.Fprintf(w, "Creating %s.md\n", title)
} else {
fmt.Fprintf(w, "Updating %s.md\n", title)
}
fn := h.Name + ".md"
edits := myers.ComputeEdits(span.URIFromPath(fn), original, string(h.Body))
diff := fmt.Sprint(gotextdiff.ToUnified(fn + "~", fn, original, edits))
fmt.Fprint(w, diff)
} else {
err = h.save()
if err != nil {
fmt.Fprintf(w, "Saving hashtag %s failed: %s", hashtag, err)
return subcommands.ExitFailure
}
}
}
}
return subcommands.ExitSuccess
}
// Go through all the documents in the same directory and look for hashtag matches in the rendered HTML in order to
// determine the most likely capitalization.
func hashtagName (namesMap map[string]string, hashtag string, docids []docid) string {
candidate := make(map[string]int)
var mostPopular string
for _, docid := range docids {
name := index.documents[docid]
if strings.Contains(name, "/") {
continue
}
p, err := loadPage(name)
if err != nil {
continue
}
// parsing finds all the hashtags
parser, _ := wikiParser()
doc := markdown.Parse(p.Body, parser)
ast.WalkFunc(doc, func(node ast.Node, entering bool) ast.WalkStatus {
if entering {
switch v := node.(type) {
case *ast.Link:
for _, attr := range v.AdditionalAttributes {
if attr == `class="tag"` {
tagName := []byte("")
ast.WalkFunc(v, func(node ast.Node, entering bool) ast.WalkStatus {
if entering && node.AsLeaf() != nil {
tagName = append(tagName, node.AsLeaf().Literal...)
}
return ast.GoToNext
})
tag := string(tagName[1:])
if strings.EqualFold(hashtag, strings.ReplaceAll(tag, " ", "_")) {
_, ok := candidate[tag]
if ok {
candidate[tag] += 1
} else {
candidate[tag] = 1
}
}
}
}
}
}
return ast.GoToNext
})
count := 0
for key, val := range candidate {
if val > count {
mostPopular = key
count = val
}
}
// shortcut
if count >= 5 {
return mostPopular
}
}
return mostPopular
}

View File

@@ -5,20 +5,31 @@
.nh
.ad l
.\" Begin generated content:
.TH "ODDMU-HASHTAGS" "1" "2024-08-29"
.TH "ODDMU-HASHTAGS" "1" "2025-08-09"
.PP
.SH NAME
.PP
oddmu-hashtags - count the hashtags used
oddmu-hashtags - work with hashtags
.PP
.SH SYNOPSIS
.PP
\fBoddmu hashtags\fR
.PP
\fBoddmu hashtags -update\fR [\fB-dry-run\fR]
.PP
.SH DESCRIPTION
.PP
The "hashtags" subcommand counts all the hashtags used and lists them, separated
by a TAB character.\&
By default, the "hashtags" subcommand counts all the hashtags used and lists
them, separated by a TAB character.\&
.PP
With the \fB-update\fR flag, the hashtag pages are update with links to all the blog
pages having the corresponding tag.\& This only necessary when migrating a
collection of Markdown files.\& Ordinarily, Oddmu maintains the hashtag pages
automatically.\& When writing pages offline, use \fIoddmu-notify\fR(1) to update the
hashtag pages.\&
.PP
Use the \fB-dry-run\fR flag to see what would change with the \fB-update\fR flag without
actually changing any files.\&
.PP
.SH EXAMPLES
.PP
@@ -30,6 +41,22 @@ oddmu hashtags | head -n 11
.fi
.RE
.PP
See what kind of changes Oddmu would suggest:
.PP
.nf
.RS 4
oddmu hashtags -update -dry-run
.fi
.RE
.PP
And then do it:
.PP
.nf
.RS 4
oddmu hashtags -update
.fi
.RE
.PP
.SH SEE ALSO
.PP
\fIoddmu\fR(1)

View File

@@ -2,16 +2,27 @@ ODDMU-HASHTAGS(1)
# NAME
oddmu-hashtags - count the hashtags used
oddmu-hashtags - work with hashtags
# SYNOPSIS
*oddmu hashtags*
*oddmu hashtags -update* [*-dry-run*]
# DESCRIPTION
The "hashtags" subcommand counts all the hashtags used and lists them, separated
by a TAB character.
By default, the "hashtags" subcommand counts all the hashtags used and lists
them, separated by a TAB character.
With the *-update* flag, the hashtag pages are update with links to all the blog
pages having the corresponding tag. This only necessary when migrating a
collection of Markdown files. Ordinarily, Oddmu maintains the hashtag pages
automatically. When writing pages offline, use _oddmu-notify_(1) to update the
hashtag pages.
Use the *-dry-run* flag to see what would change with the *-update* flag without
actually changing any files.
# EXAMPLES
@@ -21,6 +32,18 @@ List the top 10 hashtags. This requires 11 lines because of the header line.
oddmu hashtags | head -n 11
```
See what kind of changes Oddmu would suggest:
```
oddmu hashtags -update -dry-run
```
And then do it:
```
oddmu hashtags -update
```
# SEE ALSO
_oddmu_(1)

View File

@@ -5,7 +5,7 @@
.nh
.ad l
.\" Begin generated content:
.TH "ODDMU" "1" "2025-07-16"
.TH "ODDMU" "1" "2025-08-09"
.PP
.SH NAME
.PP
@@ -384,7 +384,7 @@ Oddmu running as a webserver:
.PP
.PD 0
.IP \(bu 4
\fIoddmu-hashtags\fR(1), on how to count the hashtags used
\fIoddmu-hashtags\fR(1), on working with hashtags
.IP \(bu 4
\fIoddmu-html\fR(1), on how to render a page
.IP \(bu 4

View File

@@ -315,7 +315,7 @@ If you run Oddmu as a web server:
If you run Oddmu as a static site generator or pages offline and sync them with
Oddmu running as a webserver:
- _oddmu-hashtags_(1), on how to count the hashtags used
- _oddmu-hashtags_(1), on working with hashtags
- _oddmu-html_(1), on how to render a page
- _oddmu-list_(1), on how to list pages and titles
- _oddmu-links_(1), on how to list the outgoing links for a page