#!/bin/bash # Downloads and converts an article to EPUB ok="yes" if ! command -v mercury-parser >/dev/null; then echo 'Could not find mercury-parser. Get it from:' >&2 echo ' https://mercury.postlight.com/web-parser/' >&2 ok="no" fi if ! command -v jq >/dev/null; then echo 'Could not find jq. Get it from:' >&2 echo ' https://stedolan.github.io/jq/' >&2 ok="no" fi if ! command -v pandoc >/dev/null; then echo 'Could not find pandoc. Get it from:' >&2 echo ' https://pandoc.org/' >&2 ok="no" fi if ! command -v hxcopy >/dev/null; then echo 'Could not find hxcopy. Get it from:' >&2 echo ' brew install hxml-xml-utils' >&2 echo ' or apt-get install hxml-xml-utils' >&2 ok="no" fi if [ "$ok" != "yes" ]; then exit 1 fi if [ -z "$1" ]; then echo 'You must specify at least one URL.' >&2 exit 1 fi echo "Parsing $1" >&2 json=$(mercury-parser $1) title=$(echo $json | jq -r '.title // "Untitled"') author=$(echo $json | jq -r '.author // "Unknown"') pubdate=$(echo $json | jq -r '.date_published') description=$(echo $json | jq -r '.excerpt // ""') description="${description}

${1}" content=$(echo $json | jq -r .content | hxcopy -i "$1" -o "file:///") shift for url in "$@"; do echo "Parsing and appending $url" >&2 content="${content}$(mercury-parser $url | jq -r .content)" description="${description}
${url}" done filename="${author} - ${title}.epub" pandoc -f html -o "${filename}" \ --metadata title="${title}" \ --metadata author="${author}" \ --metadata date="${pubdate}" \ --metadata description="${description}" \ <(echo $content) \ && echo "Wrote ${filename}" >&2