dctrud's Random Road

Occasional unimportant nonsense.

2022-04-16 - gmi to html With awk

Just knocked up (really mostly borrowed) a quick solution to mirror my gemlog to my https site.

https://randomroad.net/blog

Several people seem to go from some other source to both '.gmi' and '.html', or run a proxy. As my web site is on a slow machine a conversion is better, but I want '.gmi' to be the primary format. Sounded like a job for an awk script, and a bash script, so I searched and found:

https://gist.github.com/dracometallium/bf70ae09b4dd9a857d33e93daa2810c4

Thanks to dracometallium for posting that. I adapted it a bit, and then wrote a simple bash wrapper script. The wrapper script copies everything from my 'gemlog' directory of my capsule to the 'blog' directory of my site and does the conversions:


#!/usr/bin/env bash

set -u

SOURCE="."
TARGET="/home/dave/html/blog"

echo "Gemlog to Weblog conversion"
echo
echo "With thanks to dracometallium on GitHub for their AWK script!"
echo

echo "Removing old target dir: ${TARGET}"
rm -rf "${TARGET}"

echo "Creating new target dir: ${TARGET}"
mkdir -p "${TARGET}"
cp -a ${SOURCE}/* "${TARGET}"

for g in $(find "${TARGET}" -name '*.gmi'); do
	h="${g%.gmi}.html"
	echo "Converting ${g} to HTML"
	gawk -f ./convert.awk "${g}" > "${h}"
	rm "${g}"
done

echo
echo "Done"
echo
      

The awk script that I borrowed, and adapted is also below for posterity:


#!/usr/bin/gawk -f

# Adapted from:
#  https://gist.github.com/dracometallium/bf70ae09b4dd9a857d33e93daa2810c4

BEGIN{
    # Printing header!
    print "<!DOCTYPE html>"
    print "<html lang=\"en\">"
    print "  <head>"
    print "  <meta charset=\"utf-8\">"
    print "    <title>dctrud's gem^H^H^H blog</title>"
    print "    <link rel=\"stylesheet\" type=\"text/css\" href=\"/static/org-css/stylesheet.css\" />"
    print "  </head>"
    print "  <body>"
    print "    <div id=\"preamble\" class=\"status\">"
    print "    <div><center><p>[ <a href=\"/index.html\">Home</a> | <a href=\"/about.html\">About Me</a> | <a href=\"/blog/\">Blog</a> ]</center></div>"
    print "    </div>"
    print "    <div class=\"content\">"
    print "      <center>"
    print "        <p><em>This is an html conversion of a <a href=\"http://gemini.circumlunar.space\">Gemini</a>"
    print "          capsule (site).</em><p>"
    print "        <p><em>You can view the original gemlog at:<br/>"
    print "           <a href=\"gemini://dctrud.randomroad.net/gemlog\">gemini://dctrud.randomroad.net/gemlog</a></em></p>"
    print "      </center>"
    # Control variables
    pre = 0
    list = 0
}

# First we change all < and > into < and >. This will cause conflicts
# with links (=>) and quotes (>) but it will be easier to check only then than
# keep converting at each rule.
{
    gsub(/</, "\\<")
    gsub(/>/, "\\>")
}

/^```/&&(pre == 0){
    # We must close the list!
    if(list == 1){
        list = 0
        print "      </ul>"
    }
    pre = 1
    print "      <pre><code>"
    next
}

/^```/&&(pre == 1){
    pre = 0
    print "      </code></pre>"
    next
}

(pre == 1){
    print $0
    next
}

/\* /{
    if(list == 0){
        list = 1
        print "       <ul>"
    }
    sub(/\* [ \t]*/, "")
    print "         <li>"$0"</li>"
    next
}

# If the list has ended
(list == 1){
    list = 0
    print "      </ul>"
}

/^---[-]*[ \t]*$/{
    print "      <hr/>"
    next
}

/^[ \t]*$/{
    next
}

/^###/{
    sub(/^#[#]*[ \t]*/, "")
    print "      <h3>"$0"</h3>"
    next
}

/^##/{
    sub(/^#[#]*[ \t]*/, "")
    print "      <h2>"$0"</h2>"
    next
}

/^#/{
    sub(/^#[#]*[ \t]*/, "")
    print "      <h1>"$0"</h1>"
    next
}

/^>/{
    sub(/^>[ \t]*/, "")
    print "      <blockquote>"$0"</blockquote>"
    next
}

/^=>/{
    sub(/^=>[ \t]*/, "")

    url=$0
    sub(/[ \t].*$/, "", url)

    text=$0
    sub(/^[^ \t]*/, "", text)
    sub(/^[ \t]*/, "", text)
    sub(/[ \t]*$/, "", text)

    # If it's a local gemini file, link to the html:
    if((url !~ /^[a-zA-Z]*:\/\//) && ((url ~ /\.gmi$/) || (url ~ /\.gemini$/))){
        sub(/\.gmi$/, ".html", url)
        sub(/\.gemini$/, ".html", url)
    }

    if(text == ""){
        text = url
    }

    print "      <p><a href=\""url"\">"text"</a></p>"

    # If it's an image, put an inline small version under the link
    if ( (url ~ /\.png/) || (url ~ /\.jpg/) || (url ~ /\.jpeg/) ) {
	print "      <figure>"      
        print "        <img src=\""url"\" style=\"max-width: 400px\">"
	print "      </figure>"
    }


    
    next
}

{
    print "      <p>"$0"</p>"
}

END{
    # Closes open list
    if(list == 1){
        print "      </ul>"
    }
    print "    </div>"
    print "  </body>"
    print "</html>"
}
      

Index of Posts

Home