2022-04-16 - gmi to html With awk
Just knocked up (really mostly borrowed) a quick solution to mirror my gemlog to my https site.
Several people seem to go from some other source to both '.gmi' and '.html', or run a proxy. As my web site is on a slow machine a conversion is better, but I want '.gmi' to be the primary format. Sounded like a job for an awk script, and a bash script, so I searched and found:
https://gist.github.com/dracometallium/bf70ae09b4dd9a857d33e93daa2810c4
Thanks to dracometallium for posting that. I adapted it a bit, and then wrote a simple bash wrapper script. The wrapper script copies everything from my 'gemlog' directory of my capsule to the 'blog' directory of my site and does the conversions:
#!/usr/bin/env bash
set -u
SOURCE="."
TARGET="/home/dave/html/blog"
echo "Gemlog to Weblog conversion"
echo
echo "With thanks to dracometallium on GitHub for their AWK script!"
echo
echo "Removing old target dir: ${TARGET}"
rm -rf "${TARGET}"
echo "Creating new target dir: ${TARGET}"
mkdir -p "${TARGET}"
cp -a ${SOURCE}/* "${TARGET}"
for g in $(find "${TARGET}" -name '*.gmi'); do
h="${g%.gmi}.html"
echo "Converting ${g} to HTML"
gawk -f ./convert.awk "${g}" > "${h}"
rm "${g}"
done
echo
echo "Done"
echo
The awk script that I borrowed, and adapted is also below for posterity:
#!/usr/bin/gawk -f
# Adapted from:
# https://gist.github.com/dracometallium/bf70ae09b4dd9a857d33e93daa2810c4
BEGIN{
# Printing header!
print "<!DOCTYPE html>"
print "<html lang=\"en\">"
print " <head>"
print " <meta charset=\"utf-8\">"
print " <title>dctrud's gem^H^H^H blog</title>"
print " <link rel=\"stylesheet\" type=\"text/css\" href=\"/static/org-css/stylesheet.css\" />"
print " </head>"
print " <body>"
print " <div id=\"preamble\" class=\"status\">"
print " <div><center><p>[ <a href=\"/index.html\">Home</a> | <a href=\"/about.html\">About Me</a> | <a href=\"/blog/\">Blog</a> ]</center></div>"
print " </div>"
print " <div class=\"content\">"
print " <center>"
print " <p><em>This is an html conversion of a <a href=\"http://gemini.circumlunar.space\">Gemini</a>"
print " capsule (site).</em><p>"
print " <p><em>You can view the original gemlog at:<br/>"
print " <a href=\"gemini://dctrud.randomroad.net/gemlog\">gemini://dctrud.randomroad.net/gemlog</a></em></p>"
print " </center>"
# Control variables
pre = 0
list = 0
}
# First we change all < and > into < and >. This will cause conflicts
# with links (=>) and quotes (>) but it will be easier to check only then than
# keep converting at each rule.
{
gsub(/</, "\\<")
gsub(/>/, "\\>")
}
/^```/&&(pre == 0){
# We must close the list!
if(list == 1){
list = 0
print " </ul>"
}
pre = 1
print " <pre><code>"
next
}
/^```/&&(pre == 1){
pre = 0
print " </code></pre>"
next
}
(pre == 1){
print $0
next
}
/\* /{
if(list == 0){
list = 1
print " <ul>"
}
sub(/\* [ \t]*/, "")
print " <li>"$0"</li>"
next
}
# If the list has ended
(list == 1){
list = 0
print " </ul>"
}
/^---[-]*[ \t]*$/{
print " <hr/>"
next
}
/^[ \t]*$/{
next
}
/^###/{
sub(/^#[#]*[ \t]*/, "")
print " <h3>"$0"</h3>"
next
}
/^##/{
sub(/^#[#]*[ \t]*/, "")
print " <h2>"$0"</h2>"
next
}
/^#/{
sub(/^#[#]*[ \t]*/, "")
print " <h1>"$0"</h1>"
next
}
/^>/{
sub(/^>[ \t]*/, "")
print " <blockquote>"$0"</blockquote>"
next
}
/^=>/{
sub(/^=>[ \t]*/, "")
url=$0
sub(/[ \t].*$/, "", url)
text=$0
sub(/^[^ \t]*/, "", text)
sub(/^[ \t]*/, "", text)
sub(/[ \t]*$/, "", text)
# If it's a local gemini file, link to the html:
if((url !~ /^[a-zA-Z]*:\/\//) && ((url ~ /\.gmi$/) || (url ~ /\.gemini$/))){
sub(/\.gmi$/, ".html", url)
sub(/\.gemini$/, ".html", url)
}
if(text == ""){
text = url
}
print " <p><a href=\""url"\">"text"</a></p>"
# If it's an image, put an inline small version under the link
if ( (url ~ /\.png/) || (url ~ /\.jpg/) || (url ~ /\.jpeg/) ) {
print " <figure>"
print " <img src=\""url"\" style=\"max-width: 400px\">"
print " </figure>"
}
next
}
{
print " <p>"$0"</p>"
}
END{
# Closes open list
if(list == 1){
print " </ul>"
}
print " </div>"
print " </body>"
print "</html>"
}