roam/daily/2022-03-29.org
2022-04-23 00:39:06 -04:00

227 lines
7.1 KiB
Org Mode

:PROPERTIES:
:ID: 266d19c2-5ecd-48af-a9d4-4b0a7d3d5696
:END:
#+title: 2022-03-29
* Get list of subscribers with non-normalized tags on active accounts
:PROPERTIES:
:header-args:sql: :engine postgresql :cmdline "-U postgres postgres" :dir /docker:postgres: :exports both :cache yes
:END:
Investigating the impact of the new [[id:d06d3ab4-c2d0-47c3-aae1-4395567fc3d2][Tag Normalization]] rules on existing
subscribers on active accounts.
** Gathering data
I imported a dump of the =subscriber_tags= table from [[id:dd113e53-6144-4cb2-a4aa-da3dc2e3e6ea][AppDB]] as well as the
=list.subscribers= table data for all active accounts (~SELECT s.* FROM
list.subscribers s JOIN accounts a ON (a.a_id = s.account_id) WHERE a.status_id
< 7~)
I then built a table of subscribers having tags that do not match our validation
rules.
#+begin_src sql :exports code :eval never
CREATE TABLE invalid_tags AS
SELECT s.list_id, s.account_id, t.subscriber_id, tag
FROM subscribers s
JOIN subscriber_tags as t ON (s.id = t.subscriber_id)
, unnest(tags) tag
WHERE tag != normalize_tag(tag)
#+end_src
** Active accounts
#+name: total-accounts
#+begin_src sql :post humanize-numbers-in-results(results=*this*)
SELECT COUNT(DISTINCT account_id) FROM subscribers
#+end_src
#+RESULTS[5d46a920e8c34c7fd755c2d1fd82ba567f8341d9]: total-accounts
| count |
|---------|
| 103,357 |
** Subscribers on active accounts
#+name: total-subscribers
#+begin_src sql :post humanize-numbers-in-results(results=*this*)
SELECT COUNT(id) FROM subscribers
#+end_src
#+RESULTS[17e3b468a66f03b74bdddd77044359dd8e4e92e3]: total-subscribers
| count |
|-------------|
| 259,745,858 |
** Subscribers with invalid tags
#+name: subscribers-with-invalid-tags
#+begin_src sql :post humanize-numbers-in-results(results=*this*)
SELECT COUNT(DISTINCT subscriber_id) FROM invalid_tags
#+end_src
#+RESULTS[b1e6beccd5a3bd4e503473844cb12e6d03d2c21d]: subscribers-with-invalid-tags
| count |
|-----------|
| 1,331,220 |
#+RESULTS:
#+begin_src emacs-lisp
"259,745,858"
#+end_src
#+header: :var filename="2022-03-29-subscribers-with-invalid-tags.png"
#+header: :var total=total-subscribers[2,0]
#+header: :var affected=subscribers-with-invalid-tags[2,0]
#+begin_src python :exports results :results file
import matplotlib.pyplot as plt
plt.style.use("seaborn-pastel")
total = int(str(total).replace(',', ''))
affected = int(str(affected).replace(',', ''))
fig1, ax1 = plt.subplots()
ax1.pie(
[affected, total - affected],
explode=[0.5, 0],
labels=[f"Affected", "Unaffected"],
autopct="%1.1f%%",
)
plt.savefig(filename)
return filename
#+end_src
#+RESULTS:
[[file:2022-03-29-subscribers-with-invalid-tags.png]]
** Accounts with subscribers with invalid tags
#+name: accounts-with-invalid-tags
#+begin_src sql :post humanize-numbers-in-results(results=*this*)
SELECT COUNT(DISTINCT account_id) FROM invalid_tags;
#+end_src
#+RESULTS[0ee163f090d0e32e6a45b5b859948882ffe021d0]: accounts-with-invalid-tags
| count |
|-------|
| 3,220 |
#+header: :var filename="2022-03-29-accounts-with-invalid-tags.png"
#+header: :var total=total-accounts[2,0]
#+header: :var affected=accounts-with-invalid-tags[2,0]
#+begin_src python :exports results :results file
import matplotlib.pyplot as plt
plt.style.use("seaborn-pastel")
total = int(str(total).replace(',', ''))
affected = int(str(affected).replace(',', ''))
fig1, ax1 = plt.subplots()
ax1.pie(
[affected, total - affected],
explode=[0.5, 0],
labels=[f"Affected", "Unaffected"],
autopct="%1.1f%%",
)
plt.savefig(filename)
return filename
#+end_src
#+RESULTS:
[[file:2022-03-29-accounts-with-invalid-tags.png]]
** Normalized tag breakdown
#+name: tag-breakdown
#+begin_src sql :post humanize-numbers-in-results(results=*this*)
SELECT 'Non-printable characters' AS "Rule"
, COUNT(DISTINCT account_id) AS "Accounts"
, COUNT(subscriber_id) AS "Subscribers"
FROM invalid_tags
WHERE tag ~ '[^[:print:]]'
UNION SELECT 'Commas' AS "Rule"
, COUNT(DISTINCT account_id) AS "Accounts"
, COUNT(subscriber_id) AS "Subscribers"
FROM invalid_tags
WHERE tag ~ ','
UNION SELECT 'ASCII quotation marks' AS "Rule"
, COUNT(DISTINCT account_id) AS "Accounts"
, COUNT(subscriber_id) AS "Subscribers"
FROM invalid_tags
WHERE tag ~ '[''""]'
UNION SELECT 'Unicode quotation marks' AS "Rule"
, COUNT(DISTINCT account_id) AS "Accounts"
, COUNT(subscriber_id) AS "Subscribers"
FROM invalid_tags
WHERE tag ~ '[‘’“”]'
UNION SELECT 'Leading or trailing whitespace' AS "Rule"
, COUNT(DISTINCT account_id) AS "Accounts"
, COUNT(subscriber_id) AS "Subscribers"
FROM invalid_tags
WHERE TRIM(tag) != tag
UNION SELECT 'Repeated whitespace' AS "Rule"
, COUNT(DISTINCT account_id) AS "Accounts"
, COUNT(subscriber_id) AS "Subscribers"
FROM invalid_tags
WHERE TRIM(tag) ~ '[:space:]{2,}'
UNION SELECT 'Upper-case characters' AS "Rule"
, COUNT(DISTINCT account_id) AS "Accounts"
, COUNT(subscriber_id) AS "Subscribers"
FROM invalid_tags
WHERE LOWER(tag) != tag
#+end_src
#+RESULTS[853c04719cf6fd9c329359c1b72e8198393322b9]: tag-breakdown
| Rule | Accounts | Subscribers |
|--------------------------------+----------+-------------|
| Leading or trailing whitespace | 119 | 66,788 |
| Repeated whitespace | 2,404 | 1,234,651 |
| Unicode quotation marks | 126 | 21,343 |
| Commas | 378 | 54,567 |
| ASCII quotation marks | 2,507 | 1,544,607 |
| Upper-case characters | 0 | 0 |
| Non-printable characters | 58 | 1,749 |
#+header: :var filename="2022-03-29-invalid-tag-breakdown.png"
#+header: :var breakdown=tag-breakdown
#+begin_src python :exports results :results file
import matplotlib.pyplot as plt
plt.style.use("seaborn-pastel")
accounts = sorted(
[(int(str(row[1]).replace(",", "")), row[0]) for row in breakdown],
reverse=True,
)
account_total = sum(a[0] for a in accounts)
subscribers = sorted(
[(int(str(row[2]).replace(",", "")), row[0]) for row in breakdown],
reverse=True,
)
subscriber_total = sum(s[0] for s in subscribers)
fig1, axs = plt.subplots(2, 2)
axs[0, 0].set_title("Accounts")
wedges1, _ = axs[0, 0].pie(
[a[0] for a in accounts],
explode=[0.1 for _ in accounts],
)
axs[1, 0].axis("off")
axs[1, 0].legend(
wedges1,
["{:.1f}% {}".format(a[0] * 100.0 / account_total, a[1]) for a in accounts],
fontsize=8,
)
axs[0, 1].set_title("Subscribers")
wedges2, _ = axs[0, 1].pie(
[s[0] for s in subscribers],
explode=[0.1 for _ in subscribers],
)
axs[1, 1].axis("off")
axs[1, 1].legend(
wedges2,
["{:.1f}% {}".format(s[0] * 100.0 / subscriber_total, s[1]) for s in subscribers],
fontsize=8,
)
plt.savefig(filename)
return filename
#+end_src
#+RESULTS:
[[file:2022-03-29-invalid-tag-breakdown.png]]