From 34268699a4d0075ac1f05e7449163279925bfd4f Mon Sep 17 00:00:00 2001 From: Correl Date: Thu, 13 Aug 2020 21:37:53 -0400 Subject: [PATCH] [provisioning] Add article to epub download script --- bin/article | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100755 bin/article diff --git a/bin/article b/bin/article new file mode 100755 index 0000000..d08e623 --- /dev/null +++ b/bin/article @@ -0,0 +1,53 @@ +#!/bin/bash +# Downloads and converts an article to EPUB + +ok="yes" +if ! command -v mercury-parser >/dev/null; then + echo 'Could not find mercury-parser. Get it from:' >&2 + echo ' https://mercury.postlight.com/web-parser/' >&2 + ok="no" +fi + +if ! command -v jq >/dev/null; then + echo 'Could not find jq. Get it from:' >&2 + echo ' https://stedolan.github.io/jq/' >&2 + ok="no" +fi + +if ! command -v pandoc >/dev/null; then + echo 'Could not find pandoc. Get it from:' >&2 + echo ' https://pandoc.org/' >&2 + ok="no" +fi + +if [ "$ok" != "yes" ]; then + exit 1 +fi + +if [ -z "$1" ]; then + echo 'You must specify at least one URL.' >&2 + exit 1 +fi + +echo "Parsing $1" >&2 +json=$(mercury-parser $1) +title=$(echo $json | jq -r '.title // "Untitled"') +author=$(echo $json | jq -r '.author // "Unknown"') +pubdate=$(echo $json | jq -r '.date_published') +description=$(echo $json | jq -r '.excerpt // ""') +description="${description}

${1}" +content=$(echo $json | jq -r .content) +shift +for url in "$@"; do + echo "Parsing and appending $url" >&2 + content="${content}$(mercury-parser $url | jq -r .content)" + description="${description}
${url}" +done +filename="${author} - ${title}.epub" +pandoc -f html -o "${filename}" \ + --metadata title="${title}" \ + --metadata author="${author}" \ + --metadata date="${pubdate}" \ + --metadata description="${description}" \ + <(echo $content) \ + && echo "Wrote ${filename}" >&2