#!/bin/bash
# Downloads and converts an article to EPUB
ok="yes"
if ! command -v mercury-parser >/dev/null; then
echo 'Could not find mercury-parser. Get it from:' >&2
echo ' https://mercury.postlight.com/web-parser/' >&2
ok="no"
fi
if ! command -v jq >/dev/null; then
echo 'Could not find jq. Get it from:' >&2
echo ' https://stedolan.github.io/jq/' >&2
ok="no"
fi
if ! command -v pandoc >/dev/null; then
echo 'Could not find pandoc. Get it from:' >&2
echo ' https://pandoc.org/' >&2
ok="no"
fi
if ! command -v hxcopy >/dev/null; then
echo 'Could not find hxcopy. Get it from:' >&2
echo ' brew install hxml-xml-utils' >&2
echo ' or apt-get install hxml-xml-utils' >&2
ok="no"
fi
if [ "$ok" != "yes" ]; then
exit 1
fi
if [ -z "$1" ]; then
echo 'You must specify at least one URL.' >&2
exit 1
fi
echo "Parsing $1" >&2
json=$(mercury-parser $1)
title=$(echo $json | jq -r '.title // "Untitled"')
author=$(echo $json | jq -r '.author // "Unknown"')
pubdate=$(echo $json | jq -r '.date_published')
description=$(echo $json | jq -r '.excerpt // ""')
description="${description}
${1}"
content=$(echo $json | jq -r .content | hxcopy -i "$1" -o "file:///")
shift
for url in "$@"; do
echo "Parsing and appending $url" >&2
content="${content}$(mercury-parser $url | jq -r .content)"
description="${description}
${url}"
done
filename="${author} - ${title}.epub"
pandoc -f html -o "${filename}" \
--metadata title="${title}" \
--metadata author="${author}" \
--metadata date="${pubdate}" \
--metadata description="${description}" \
<(echo $content) \
&& echo "Wrote ${filename}" >&2