1 Commits

Author SHA1 Message Date
Alex Schroeder
faf9198edd Add sitemap command and handler 2026-01-02 22:30:02 +01:00
13 changed files with 206 additions and 43 deletions

View File

@@ -110,6 +110,10 @@ Markdown pages from the command line.
This man page documents the "feed" subcommand to generate a feed from
Markdown pages from the command line.
[oddmu-sitemap(1)](https://alexschroeder.ch/view/oddmu/oddmu-sitemap.1):
This man page documents the "sitemap" subcommand to generate the
static sitemap from the command line.
[oddmu-static(1)](https://alexschroeder.ch/view/oddmu/oddmu-static.1):
This man page documents the "static" subcommand to generate an entire
static website from the command line, avoiding the need to run Oddmu
@@ -255,6 +259,7 @@ high-level introduction to the various source files.
- `preview.go` implements the `/preview` handler
- `score.go` implements the page scoring when showing search results
- `search.go` implements the `/search` handler
- `sitemap.go` implements the `/sitemap` handler
- `snippets.go` implements the page summaries for search results
- `templates.go` implements template loading and reloading
- `tokenizer.go` implements the various tokenizers used

View File

@@ -40,7 +40,7 @@ ServerAdmin alex@alexschroeder.ch
<VirtualHost *:443>
ServerName transjovian.org
SSLEngine on
ProxyPassMatch "^/((view|preview|diff|edit|save|add|append|upload|drop|search|archive)/(.*))?$" \
ProxyPassMatch "^/((view|preview|diff|edit|save|add|append|upload|drop|search|archive)/(.*)|sitemap\.xml)?$" \
"http://localhost:8080/$1"
</VirtualHost>
```
@@ -106,13 +106,13 @@ ServerAdmin alex@alexschroeder.ch
ServerName transjovian.org
ProxyPassMatch "^/((view|diff|search|archive)/(.*))?$" \
"http://localhost:8080/$1"
RedirectMatch "^/((edit|save|add|append|upload|drop)/(.*))?$" \
RedirectMatch "^/((edit|save|add|append|upload|drop)/(.*)|sitemap\.xml)?$" \
"https://transjovian.org/$1"
</VirtualHost>
<VirtualHost *:443>
ServerName transjovian.org
SSLEngine on
ProxyPassMatch "^/((view|preview|diff|edit|save|add|append|upload|drop|search|archive)/(.*))?$" \
ProxyPassMatch "^/((view|preview|diff|edit|save|add|append|upload|drop|search|archive)/(.*)|sitemap\.xml)?$" \
"http://localhost:8080/$1"
</VirtualHost>
```
@@ -144,7 +144,7 @@ You probably want to serve some static files as well (see *Serve static files*).
In that case, you need to use the ProxyPassMatch directive.
```
ProxyPassMatch "^/((view|preview|diff|edit|save|add|append|upload|drop|search|archive)/(.*))?$" \
ProxyPassMatch "^/((view|preview|diff|edit|save|add|append|upload|drop|search|archive)/(.*)|sitemap\.xml)?$" \
"unix:/run/oddmu/oddmu.sock|http://localhost/$1"
```
@@ -159,7 +159,7 @@ A workaround is to add the redirect manually and drop the question-mark:
```
RedirectMatch "^/$" "/view/index"
ProxyPassMatch "^/((view|preview|diff|edit|save|add|append|upload|drop|search|archive)/(.*))$" \
ProxyPassMatch "^/((view|preview|diff|edit|save|add|append|upload|drop|search|archive)/(.*)|sitemap\.xml)$" \
"unix:/run/oddmu/oddmu.sock|http://localhost/$1"
```
@@ -213,8 +213,9 @@ The way Oddmu handles subdirectories is that all files and directories are
visible, except for "hidden" files and directories (whose name starts with a
period). Specifically, do not rely on Apache to hide locations in subdirectories
from public view. Search reveals the existence of these pages and produces an
extract, even if users cannot follow the links. Archive links pack all the
subdirectories, including locations you may have hidden from view using Apache.
extract, even if users cannot follow the links. The Sitemap lists all pages,
including subdirectories. Archive links pack all the subdirectories, including
locations you may have hidden from view using Apache.
If you to treat subdirectories as separate sites, you need to set the
environment variable ODDMU_FILTER to a regular expression matching the those

View File

@@ -6,13 +6,13 @@ oddmu-filter - keeping subdirectories separate
# DESCRIPTION
There are actions such as searching and archiving that act on multiple pages,
not just a single page. These actions walk the directory tree, including all
subdirectories. In some cases, this is not desirable.
There are actions such as producing the sitemap, searching and archiving that
act on multiple pages, not just a single page. These actions walk the directory
tree, including all subdirectories. In some cases, this is not desirable.
Sometimes, subdirectories are separate sites, like the sites of other projects
or different people. Depending on how you think about it, you might not want to
include those "sites" in searches or archives of the whole site.
include those "sites" in searches, sitemaps or archives of the whole site.
Since directory tree actions always start in the directory the visitor is
currently looking at, directory tree actions starting in a "separate site"

View File

@@ -19,7 +19,7 @@ The site is defined in "/etc/nginx/sites-available/default", in the _server_
section. Add a new _location_ section after the existing _location_ section:
```
location ~ ^/(view|preview|diff|edit|save|add|append|upload|drop|search|archive)/ {
location ~ ^/(view|preview|diff|edit|save|add|append|upload|drop|search|sitemap|archive)/ {
proxy_pass http://localhost:8080;
}
```
@@ -81,7 +81,7 @@ server configuration. On a Debian system, that'd be in
"/etc/nginx/sites-available/default".
```
location ~ ^/(view|preview|diff|edit|save|add|append|upload|drop|search|archive)/ {
location ~ ^/(view|preview|diff|edit|save|add|append|upload|drop|search|sitemap|archive)/ {
proxy_pass http://unix:/run/oddmu/oddmu.sock:;
}
```

38
man/oddmu-sitemap.1.txt Normal file
View File

@@ -0,0 +1,38 @@
ODDMU-SITEMAP(1)
# NAME
oddmu-sitemap - print static sitemap.xml
# SYNOPSIS
*oddmu sitemap* [-base URL]
# DESCRIPTION
The "sitemap" subcommand prints the list of all pages in Sitemap format. Oddmu
already serves the sitemap at the URL "/sitemap.xml" but if you'd prefer to
provide a static file, use this command and redirect the output to a file called
"sitemap.xml" in your document root at regular intervals.
If you do this, don't proxy the "/sitemap" URL in the web server configuration.
Your "robots.txt" file, if you have one, should point at the sitemap you
provide.
# OPTIONS
*-base* _URL_
The base URL is something like "https://example.org/view/".
*-filter* _regexp_
A regular expression matching the pages to exclude from the sitemap.
This emulates the effect of the ODDMU_FILTER environment variable.
# SEE ALSO
_oddmu_(1), _oddmu-filter_(7), _oddmu-apache_(1), _oddmu-nginx_(1),
https://www.sitemaps.org/
# AUTHORS
Maintained by Alex Schroeder <alex@gnu.org>.

View File

@@ -18,9 +18,9 @@ placeholders.
- _diff.html_ uses a _page_
- _edit.html_ uses a _page_
- _feed.html_ uses a _feed_
- _list.html_ uses a _list_
- _preview.html_ uses a _page_
- _search.html_ uses a _search_
- _sitemap.html_ uses a _sitemap_
- _static.html_ uses a _page_
- _upload.html_ uses an _upload_
- _view.html_ uses a _page_
@@ -146,32 +146,6 @@ explanation. The next year is one lower than the year currently shown (if on a
year page) or the current year (if looking at the index). If it isn't set, it's
value is 0.
## List
The list contains a directory name and an array of files.
_{{.Dir}}_ is the directory name that is being listed, percent-encoded.
_{{.Files}}_ is the array of files. To refer to them, you need to use a _{{range
.Files}}_ … _{{end}}_ construct.
Each file has the following attributes:
_{{.Name}}_ is the filename. The ".md" suffix for Markdown files is part of the
name (unlike page names).
_{{.Path}}_ is the page name, percent-encoded.
_{{.Title}}_ is the page title, if the file in question is a Markdown file.
_{{.IsDir}}_ is a boolean used to indicate that this file is a directory.
_{{.IsUp}}_ is a boolean used to indicate the entry for the parent directory
(the first file in the array, unless the directory being listed is the top
directory). The filename of this file is "..".
_{{.Date}}_ is the last modification date of the file.
## Search
_{{.Query}}_ is the query string.
@@ -210,6 +184,16 @@ _{{.Name}}_ is the file name for use in URLs.
_{{.Html}}_ the image alt-text with a bold tag used to highlight the first
search term that matched.
## Sitemap
The sitemap contains a list of URLs, each with its location:
_{{.URL}}_ is the list of URLs.
Each URL has the following attributes:
_{{.Loc}}_ with the actual page URL.
## Upload
_{{.Dir}}_ is the directory where the uploaded file ends up, based on the URL

View File

@@ -56,6 +56,7 @@ directory:
- _/upload/dir/name_ shows a form to upload a file
- _/drop/dir/name_ saves an upload
- _/search/dir/?q=term_ to search for a term
- _/sitemap.xml_ to list the links to all the pages
- _/archive/dir/name.zip_ to download a zip file of a directory
When calling the _save_ and _append_ action, the page name is taken from the URL
@@ -324,6 +325,7 @@ Oddmu running as a webserver:
- _oddmu-notify_(1), on updating index, changes and hashtag pages
- _oddmu-replace_(1), on how to search and replace text
- _oddmu-search_(1), on how to run a search
- _oddmu-sitemap_(1), on generating a static sitemap.xml
- _oddmu-static_(1), on generating a static site
- _oddmu-toc_(1), on how to list the table of contents (toc) a page
- _oddmu-version_(1), on how to get all the build information from the binary

48
sitemap.go Normal file
View File

@@ -0,0 +1,48 @@
package main
import (
"log"
"net/http"
"os"
"regexp"
)
type SitemapURL struct {
Loc string
}
type Sitemap struct {
URL []*SitemapURL
}
// sitemapHandler lists all the pages. See https://www.sitemaps.org/protocol.html for more. It takes the
// ODDMU_FILTER environment variable into account.
func sitemapHandler(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != "/sitemap.xml" {
http.NotFound(w, r)
} else {
w.Write([]byte(`<?xml version="1.0" encoding="UTF-8"?>`))
base := r.URL.Scheme + "://" + r.URL.Host + "/view/"
filter := os.Getenv("ODDMU_FILTER")
renderTemplate(w, ".", "sitemap", sitemap(&index, base, filter))
}
}
// sitemap generates the list of URLs. A reference to the index needs to be provided to make it easier to write
// tests. Exclude pages matching the filter.
func sitemap(idx *indexStore, base, filter string) Sitemap {
url := make([]*SitemapURL, 0)
re, err := regexp.Compile(filter)
if err != nil {
log.Println("ODDMU_FILTER does not compile:", filter, err)
return Sitemap{URL: url}
}
idx.RLock()
defer idx.RUnlock()
for name := range idx.titles {
if !re.MatchString(name) {
url = append(url, &SitemapURL{Loc: base + name})
}
}
return Sitemap{URL: url}
}

3
sitemap.html Normal file
View File

@@ -0,0 +1,3 @@
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
{{range .URL}}<url><loc>{{.Loc}}</loc></url>
{{end}}</urlset>

62
sitemap_cmd.go Normal file
View File

@@ -0,0 +1,62 @@
package main
import (
"context"
"fmt"
"flag"
"io"
"log"
"os"
"github.com/google/subcommands"
)
type sitemapCmd struct {
base string
filter string
}
func (cmd *sitemapCmd) SetFlags(f *flag.FlagSet) {
f.StringVar(&cmd.base, "base", "http://localhost:8080/view/", "the base URL for the sitemap")
f.StringVar(&cmd.filter, "filter", "", "a regular expression to filter pages")
}
func (*sitemapCmd) Name() string { return "sitemap" }
func (*sitemapCmd) Synopsis() string { return "list all the pages known in Sitemap format" }
func (*sitemapCmd) Usage() string {
return `sitemap [-base URL] [-filter regex]:
Print all the pages known in Sitemap format.
See https://www.sitemaps.org/ for more.
`
}
func (cmd *sitemapCmd) Execute(_ context.Context, f *flag.FlagSet, _ ...interface{}) subcommands.ExitStatus {
n, err := index.load()
if err != nil {
fmt.Fprintf(os.Stderr, "Index load: %s\n", err)
return subcommands.ExitFailure
}
fmt.Fprintf(os.Stderr, "Indexed %d pages\n", n)
return sitemapCli(os.Stdout, &index, cmd.base, cmd.filter)
}
// sitemapCli implements the printing of a Sitemap. In order to make testing easier, it takes a Writer and an
// indexStore. The Writer is important so that test code can provide a buffer instead of os.Stdout; the indexStore
// is important so that test code can ensure no other test running in parallel can interfere with the list of known
// pages (by adding or deleting pages).
func sitemapCli(w io.Writer, idx *indexStore, base, filter string) subcommands.ExitStatus {
loadTemplates()
template := "sitemap.html"
t := templates.template[template]
if t == nil {
log.Println("Template not found:", template)
return subcommands.ExitFailure
}
w.Write([]byte(`<?xml version="1.0" encoding="UTF-8"?>` + "\n"))
err := t.Execute(w, sitemap(idx, base, filter))
if err != nil {
log.Println(err)
return subcommands.ExitFailure
}
return subcommands.ExitSuccess
}

18
sitemap_cmd_test.go Normal file
View File

@@ -0,0 +1,18 @@
package main
import (
"bytes"
"testing"
"github.com/google/subcommands"
"github.com/stretchr/testify/assert"
)
func TestSitemapCmd(t *testing.T) {
b := new(bytes.Buffer)
s := sitemapCli(b, minimalIndex(t), "https://example.org/view/", "^themes/")
assert.Equal(t, subcommands.ExitSuccess, s)
assert.Contains(t, b.String(), "https://example.org/view/index")
assert.Contains(t, b.String(), "https://example.org/view/README")
assert.NotContains(t, b.String(), "https://example.org/view/themes/")
}

View File

@@ -11,11 +11,11 @@ import (
"sync"
)
// templateFiles are the various HTML template files used. These files must exist in the root directory for Oddmu to be
// able to generate HTML output. This always requires a template.
// templateFiles are the various HTML template files used. These files must exist in the root directory for Oddmu
// to be able to generate HTML output. This always requires a template.
var templateFiles = []string{"edit.html", "add.html", "view.html", "preview.html",
"diff.html", "search.html", "static.html", "upload.html", "feed.html",
"list.html"}
"sitemap.html"}
// templateStore controls access to map of parsed HTML templates. Make sure to lock and unlock as appropriate. See
// renderTemplate and loadTemplates.

View File

@@ -208,6 +208,7 @@ func serve() {
mux.HandleFunc("/upload/", makeHandler(uploadHandler, false, http.MethodGet))
mux.HandleFunc("/drop/", makeHandler(dropHandler, false, http.MethodPost))
mux.HandleFunc("/search/", makeHandler(searchHandler, false, http.MethodGet, http.MethodPost))
mux.HandleFunc("/sitemap", sitemapHandler)
srv := &http.Server{
ReadTimeout: 2 * time.Minute,
WriteTimeout: 5 * time.Minute,
@@ -239,6 +240,7 @@ func commands() {
subcommands.Register(&staticCmd{}, "")
subcommands.Register(&tocCmd{}, "")
subcommands.Register(&versionCmd{}, "")
subcommands.Register(&sitemapCmd{}, "")
flag.Parse()
ctx := context.Background()