roam/daily/2021-12-28.org

455 lines
12 KiB
Org Mode
Raw Normal View History

2022-01-10 15:56:56 +00:00
:PROPERTIES:
:ID: 9cf3c4cc-44a5-4ff4-bdf5-bc04b214a8e3
:END:
#+title: 2021-12-28
* Identifying and correcting missing recipient records :ATTACH:
#+begin_src python :results file :eval no-export
import dataclasses
import json
import requests
source = "data/9c/f3c4cc-44a5-4ff4-bdf5-bc04b214a8e3/unique-recipients.jsons"
destination = "data/9c/f3c4cc-44a5-4ff4-bdf5-bc04b214a8e3/subscribers.jsons"
with open(destination, 'w') as f_out:
with open(source, 'r') as f_in:
for line in f_in:
row = json.loads(line)
uuid = row['recipient']
mapping = requests.get(f'https://mapping.aweberprod.com/{uuid}').json()
subscriber = requests.get(f'http://subscriber.service.production.consul/subscriber/{mapping["value"]}').json()
print(json.dumps(subscriber), file=f_out)
return destination
#+end_src
#+RESULTS:
[[file:data/9c/f3c4cc-44a5-4ff4-bdf5-bc04b214a8e3/subscribers.jsons]]
#+begin_src python :exports both :eval no-export
import collections
import json
import requests
list_ids = collections.Counter()
with open("data/9c/f3c4cc-44a5-4ff4-bdf5-bc04b214a8e3/subscribers.jsons") as f:
for line in f:
if "SubscriberNotFound" in line:
continue
row = json.loads(line)
list_ids[row["list_id"]] += 1
lists = [
["List ID", "Account ID", "List name", "Friendly list name", "Subscribers"],
None,
]
for list_id, subscribers in list_ids.items():
list_details = requests.get(
f"http://list.service.production.consul/v1/lists/{list_id}"
).json()
lists.append(
[
list_details["list_id"],
list_details["account_id"],
list_details["list_name"],
list_details["friendly_list_name"],
subscribers,
]
)
return lists
#+end_src
#+RESULTS:
| List ID | Account ID | List name | Friendly list name | Subscribers |
|---------+------------+---------------+----------------------+-------------|
| 4385291 | 234390 | awlist4385291 | Cosmos Subscribers | 6 |
| 3491206 | 878912 | awlist3491206 | Just Creative | 1 |
| 6170662 | 979644 | awlist6170662 | 2,000 ar@nullsum.net | 1504 |
This is a breakdown of the existing subscribers that failed due to missing
recipient records. Most of the missing recipients belong to Andrew's account.
#+begin_src bash :results output :exports both :eval no-export
cat data/9c/f3c4cc-44a5-4ff4-bdf5-bc04b214a8e3/subscribers.jsons \
| grep '"list_id": 6170662' \
| jq -r .email \
| cut -d '@' -f 2 \
| sort | uniq -c
#+end_src
#+RESULTS:
: 1504 nullsum.net
These seem related to an issue he had a while back where, by using a lot of
subscribers with the same email address but with =+= identifiers to uniqueify
them, he ran into an edge race condition with recipient record updates:
https://aweber.slack.com/archives/CF62W6D5G/p1637680253327200.
These are the 7 remaining affected subscribers:
#+begin_src bash :results output :wrap src json :exports both :eval no-export
cat data/9c/f3c4cc-44a5-4ff4-bdf5-bc04b214a8e3/subscribers.jsons \
| egrep '"list_id": (4385291|3491206)' \
| jq -r .
#+end_src
#+RESULTS:
#+begin_src json
{
"isp": null,
"subscriber_id": 2565282848,
"legacy_name": " ",
"subscriber_source": "CEP",
"dma_code": "510",
"id": 2565282848,
"custom_fields": {
"sha1": null,
"subscription date": null,
"gclid": null,
"timestamp": null,
"receipt": null,
"system": null,
"birthday": " ",
"cbid": null
},
"subscriber_uuid": "1ead2f7c-69af-483b-88d1-6d532df52df1",
"city": "Cuyahoga Falls",
"verified": 0,
"self": "http://subscriber.service.production.consul/subscriber/2565282848",
"verification_time": null,
"subscribe_method": "api",
"stop_time": null,
"list_id": 4385291,
"latitude": 41.1482,
"email": "goog@neo.rr.com",
"status": "subscribed",
"last_followup": 1001,
"area_code": null,
"followuptime": null,
"org": null,
"postal": "44221",
"ip_address": "184.56.13.147",
"name": "Ron",
"lead_id": 70547273,
"add_url": null,
"country": "United States",
"region": "OH",
"unsubscribe_source": null,
"longitude": -81.4736,
"subscribed_at": "2018-05-20T12:40:36.263651-04:00",
"stop_method": null,
"unsubscribe_timestamp": null
}
{
"isp": null,
"subscriber_id": 2565282848,
"legacy_name": " ",
"subscriber_source": "CEP",
"dma_code": "510",
"id": 2565282848,
"custom_fields": {
"sha1": null,
"subscription date": null,
"gclid": null,
"timestamp": null,
"receipt": null,
"system": null,
"birthday": " ",
"cbid": null
},
"subscriber_uuid": "1ead2f7c-69af-483b-88d1-6d532df52df1",
"city": "Cuyahoga Falls",
"verified": 0,
"self": "http://subscriber.service.production.consul/subscriber/2565282848",
"verification_time": null,
"subscribe_method": "api",
"stop_time": null,
"list_id": 4385291,
"latitude": 41.1482,
"email": "goog@neo.rr.com",
"status": "subscribed",
"last_followup": 1001,
"area_code": null,
"followuptime": null,
"org": null,
"postal": "44221",
"ip_address": "184.56.13.147",
"name": "Ron",
"lead_id": 70547273,
"add_url": null,
"country": "United States",
"region": "OH",
"unsubscribe_source": null,
"longitude": -81.4736,
"subscribed_at": "2018-05-20T12:40:36.263651-04:00",
"stop_method": null,
"unsubscribe_timestamp": null
}
{
"isp": null,
"subscriber_id": 3024684746,
"legacy_name": " ",
"subscriber_source": "CEP",
"dma_code": null,
"id": 3024684746,
"custom_fields": {
"sha1": null,
"subscription date": null,
"gclid": "",
"timestamp": null,
"receipt": null,
"system": null,
"birthday": " ",
"cbid": null
},
"subscriber_uuid": "a3250da2-00e6-4811-9029-690ad617e38e",
"city": "Burlington",
"verified": 0,
"self": "http://subscriber.service.production.consul/subscriber/3024684746",
"verification_time": null,
"subscribe_method": "api",
"stop_time": null,
"list_id": 4385291,
"latitude": 43.4342,
"email": "homa.partovi1340@gmail.com",
"status": "subscribed",
"last_followup": 1001,
"area_code": null,
"followuptime": null,
"org": null,
"postal": "L7M",
"ip_address": "64.229.66.185",
"name": "Homa",
"lead_id": 74312900,
"add_url": null,
"country": "Canada",
"region": "ON",
"unsubscribe_source": null,
"longitude": -79.8701,
"subscribed_at": "2019-01-01T09:30:03.463783-05:00",
"stop_method": null,
"unsubscribe_timestamp": null
}
{
"isp": null,
"subscriber_id": 3024684746,
"legacy_name": " ",
"subscriber_source": "CEP",
"dma_code": null,
"id": 3024684746,
"custom_fields": {
"sha1": null,
"subscription date": null,
"gclid": "",
"timestamp": null,
"receipt": null,
"system": null,
"birthday": " ",
"cbid": null
},
"subscriber_uuid": "a3250da2-00e6-4811-9029-690ad617e38e",
"city": "Burlington",
"verified": 0,
"self": "http://subscriber.service.production.consul/subscriber/3024684746",
"verification_time": null,
"subscribe_method": "api",
"stop_time": null,
"list_id": 4385291,
"latitude": 43.4342,
"email": "homa.partovi1340@gmail.com",
"status": "subscribed",
"last_followup": 1001,
"area_code": null,
"followuptime": null,
"org": null,
"postal": "L7M",
"ip_address": "64.229.66.185",
"name": "Homa",
"lead_id": 74312900,
"add_url": null,
"country": "Canada",
"region": "ON",
"unsubscribe_source": null,
"longitude": -79.8701,
"subscribed_at": "2019-01-01T09:30:03.463783-05:00",
"stop_method": null,
"unsubscribe_timestamp": null
}
{
"isp": null,
"subscriber_id": 3572674791,
"legacy_name": null,
"subscriber_source": null,
"dma_code": null,
"id": 3572674791,
"custom_fields": {
"sha1": null,
"subscription date": null,
"gclid": null,
"timestamp": null,
"receipt": null,
"system": null,
"birthday": null,
"cbid": null
},
"subscriber_uuid": "f36b791d-db77-4bcb-8d12-579a52fcc10b",
"city": null,
"verified": 0,
"self": "http://subscriber.service.production.consul/subscriber/3572674791",
"verification_time": null,
"subscribe_method": "api",
"stop_time": null,
"list_id": 4385291,
"latitude": null,
"email": "reikiartlove@outlook.com",
"status": "subscribed",
"last_followup": 1001,
"area_code": null,
"followuptime": null,
"org": null,
"postal": null,
"ip_address": null,
"name": null,
"lead_id": 78370684,
"add_url": null,
"country": null,
"region": null,
"unsubscribe_source": null,
"longitude": null,
"subscribed_at": "2020-09-25T01:58:52.823722-04:00",
"stop_method": null,
"unsubscribe_timestamp": null
}
{
"isp": null,
"subscriber_id": 3572674791,
"legacy_name": null,
"subscriber_source": null,
"dma_code": null,
"id": 3572674791,
"custom_fields": {
"sha1": null,
"subscription date": null,
"gclid": null,
"timestamp": null,
"receipt": null,
"system": null,
"birthday": null,
"cbid": null
},
"subscriber_uuid": "f36b791d-db77-4bcb-8d12-579a52fcc10b",
"city": null,
"verified": 0,
"self": "http://subscriber.service.production.consul/subscriber/3572674791",
"verification_time": null,
"subscribe_method": "api",
"stop_time": null,
"list_id": 4385291,
"latitude": null,
"email": "reikiartlove@outlook.com",
"status": "subscribed",
"last_followup": 1001,
"area_code": null,
"followuptime": null,
"org": null,
"postal": null,
"ip_address": null,
"name": null,
"lead_id": 78370684,
"add_url": null,
"country": null,
"region": null,
"unsubscribe_source": null,
"longitude": null,
"subscribed_at": "2020-09-25T01:58:52.823722-04:00",
"stop_method": null,
"unsubscribe_timestamp": null
}
{
"isp": null,
"subscriber_id": 1018258237,
"legacy_name": "",
"subscriber_source": "just_creative_popup_",
"dma_code": "0",
"id": 1018258237,
"custom_fields": {},
"subscriber_uuid": "68afa958-7c60-42de-a87b-cb04c871a728",
"city": "Porto Alegre",
"verified": 1,
"self": "http://subscriber.service.production.consul/subscriber/1018258237",
"verification_time": "2015-05-06T09:44:08",
"subscribe_method": "webform",
"stop_time": null,
"list_id": 3491206,
"latitude": -30.0333,
"email": "aferreira385@gmail.com",
"status": "subscribed",
"last_followup": 7,
"area_code": "0",
"followuptime": "2018-10-15T23:04:12.062433-04:00",
"org": null,
"postal": null,
"ip_address": "177.6.1.131",
"name": "",
"lead_id": 55748845,
"add_url": "http://justcreative.com/",
"country": "Brazil",
"region": "23",
"unsubscribe_source": null,
"longitude": -51.2,
"subscribed_at": "2015-05-06T09:43:32-04:00",
"stop_method": null,
"unsubscribe_timestamp": null
}
#+end_src
#+begin_src python :results output :eval never
import io
import json
import fastavro
import requests
import pika
schema = fastavro.parse_schema(
requests.get("http://schema.aweberprod.com/avro/subscriber.rebuild.v1.avsc").json()
)
def subscribers():
with open("data/9c/f3c4cc-44a5-4ff4-bdf5-bc04b214a8e3/subscribers.jsons", "r") as f:
for line in f:
if "SubscriberNotFound" in line:
continue
yield json.loads(line)
conn = pika.BlockingConnection(
pika.URLParameters("amqp://admin:rabbitmq@rabbitmq.aweberprod.com:5672/%2F")
)
channel = conn.channel()
for subscriber in subscribers():
stream = io.BytesIO()
fastavro.schemaless_writer(
stream, schema, {"subscriber": subscriber["subscriber_uuid"]}
)
body = stream.getvalue()
channel.basic_publish(
"rpc",
"subscriber.rebuild",
body,
pika.BasicProperties(
app_id="correl/1.0.0",
content_type="application/vnd.apache.avro.datum",
type="subscriber.rebuild.v1",
),
)
conn.close()
#+end_src
#+RESULTS: