:PROPERTIES: :ID: 9cf3c4cc-44a5-4ff4-bdf5-bc04b214a8e3 :END: #+title: 2021-12-28 * Identifying and correcting missing recipient records :ATTACH: #+begin_src python :results file :eval no-export import dataclasses import json import requests source = "data/9c/f3c4cc-44a5-4ff4-bdf5-bc04b214a8e3/unique-recipients.jsons" destination = "data/9c/f3c4cc-44a5-4ff4-bdf5-bc04b214a8e3/subscribers.jsons" with open(destination, 'w') as f_out: with open(source, 'r') as f_in: for line in f_in: row = json.loads(line) uuid = row['recipient'] mapping = requests.get(f'https://mapping.aweberprod.com/{uuid}').json() subscriber = requests.get(f'http://subscriber.service.production.consul/subscriber/{mapping["value"]}').json() print(json.dumps(subscriber), file=f_out) return destination #+end_src #+RESULTS: [[file:data/9c/f3c4cc-44a5-4ff4-bdf5-bc04b214a8e3/subscribers.jsons]] #+begin_src python :exports both :eval no-export import collections import json import requests list_ids = collections.Counter() with open("data/9c/f3c4cc-44a5-4ff4-bdf5-bc04b214a8e3/subscribers.jsons") as f: for line in f: if "SubscriberNotFound" in line: continue row = json.loads(line) list_ids[row["list_id"]] += 1 lists = [ ["List ID", "Account ID", "List name", "Friendly list name", "Subscribers"], None, ] for list_id, subscribers in list_ids.items(): list_details = requests.get( f"http://list.service.production.consul/v1/lists/{list_id}" ).json() lists.append( [ list_details["list_id"], list_details["account_id"], list_details["list_name"], list_details["friendly_list_name"], subscribers, ] ) return lists #+end_src #+RESULTS: | List ID | Account ID | List name | Friendly list name | Subscribers | |---------+------------+---------------+----------------------+-------------| | 4385291 | 234390 | awlist4385291 | Cosmos Subscribers | 6 | | 3491206 | 878912 | awlist3491206 | Just Creative | 1 | | 6170662 | 979644 | awlist6170662 | 2,000 ar@nullsum.net | 1504 | This is a breakdown of the existing subscribers that failed due to missing recipient records. Most of the missing recipients belong to Andrew's account. #+begin_src bash :results output :exports both :eval no-export cat data/9c/f3c4cc-44a5-4ff4-bdf5-bc04b214a8e3/subscribers.jsons \ | grep '"list_id": 6170662' \ | jq -r .email \ | cut -d '@' -f 2 \ | sort | uniq -c #+end_src #+RESULTS: : 1504 nullsum.net These seem related to an issue he had a while back where, by using a lot of subscribers with the same email address but with =+= identifiers to uniqueify them, he ran into an edge race condition with recipient record updates: https://aweber.slack.com/archives/CF62W6D5G/p1637680253327200. These are the 7 remaining affected subscribers: #+begin_src bash :results output :wrap src json :exports both :eval no-export cat data/9c/f3c4cc-44a5-4ff4-bdf5-bc04b214a8e3/subscribers.jsons \ | egrep '"list_id": (4385291|3491206)' \ | jq -r . #+end_src #+RESULTS: #+begin_src json { "isp": null, "subscriber_id": 2565282848, "legacy_name": " ", "subscriber_source": "CEP", "dma_code": "510", "id": 2565282848, "custom_fields": { "sha1": null, "subscription date": null, "gclid": null, "timestamp": null, "receipt": null, "system": null, "birthday": " ", "cbid": null }, "subscriber_uuid": "1ead2f7c-69af-483b-88d1-6d532df52df1", "city": "Cuyahoga Falls", "verified": 0, "self": "http://subscriber.service.production.consul/subscriber/2565282848", "verification_time": null, "subscribe_method": "api", "stop_time": null, "list_id": 4385291, "latitude": 41.1482, "email": "goog@neo.rr.com", "status": "subscribed", "last_followup": 1001, "area_code": null, "followuptime": null, "org": null, "postal": "44221", "ip_address": "184.56.13.147", "name": "Ron", "lead_id": 70547273, "add_url": null, "country": "United States", "region": "OH", "unsubscribe_source": null, "longitude": -81.4736, "subscribed_at": "2018-05-20T12:40:36.263651-04:00", "stop_method": null, "unsubscribe_timestamp": null } { "isp": null, "subscriber_id": 2565282848, "legacy_name": " ", "subscriber_source": "CEP", "dma_code": "510", "id": 2565282848, "custom_fields": { "sha1": null, "subscription date": null, "gclid": null, "timestamp": null, "receipt": null, "system": null, "birthday": " ", "cbid": null }, "subscriber_uuid": "1ead2f7c-69af-483b-88d1-6d532df52df1", "city": "Cuyahoga Falls", "verified": 0, "self": "http://subscriber.service.production.consul/subscriber/2565282848", "verification_time": null, "subscribe_method": "api", "stop_time": null, "list_id": 4385291, "latitude": 41.1482, "email": "goog@neo.rr.com", "status": "subscribed", "last_followup": 1001, "area_code": null, "followuptime": null, "org": null, "postal": "44221", "ip_address": "184.56.13.147", "name": "Ron", "lead_id": 70547273, "add_url": null, "country": "United States", "region": "OH", "unsubscribe_source": null, "longitude": -81.4736, "subscribed_at": "2018-05-20T12:40:36.263651-04:00", "stop_method": null, "unsubscribe_timestamp": null } { "isp": null, "subscriber_id": 3024684746, "legacy_name": " ", "subscriber_source": "CEP", "dma_code": null, "id": 3024684746, "custom_fields": { "sha1": null, "subscription date": null, "gclid": "", "timestamp": null, "receipt": null, "system": null, "birthday": " ", "cbid": null }, "subscriber_uuid": "a3250da2-00e6-4811-9029-690ad617e38e", "city": "Burlington", "verified": 0, "self": "http://subscriber.service.production.consul/subscriber/3024684746", "verification_time": null, "subscribe_method": "api", "stop_time": null, "list_id": 4385291, "latitude": 43.4342, "email": "homa.partovi1340@gmail.com", "status": "subscribed", "last_followup": 1001, "area_code": null, "followuptime": null, "org": null, "postal": "L7M", "ip_address": "64.229.66.185", "name": "Homa", "lead_id": 74312900, "add_url": null, "country": "Canada", "region": "ON", "unsubscribe_source": null, "longitude": -79.8701, "subscribed_at": "2019-01-01T09:30:03.463783-05:00", "stop_method": null, "unsubscribe_timestamp": null } { "isp": null, "subscriber_id": 3024684746, "legacy_name": " ", "subscriber_source": "CEP", "dma_code": null, "id": 3024684746, "custom_fields": { "sha1": null, "subscription date": null, "gclid": "", "timestamp": null, "receipt": null, "system": null, "birthday": " ", "cbid": null }, "subscriber_uuid": "a3250da2-00e6-4811-9029-690ad617e38e", "city": "Burlington", "verified": 0, "self": "http://subscriber.service.production.consul/subscriber/3024684746", "verification_time": null, "subscribe_method": "api", "stop_time": null, "list_id": 4385291, "latitude": 43.4342, "email": "homa.partovi1340@gmail.com", "status": "subscribed", "last_followup": 1001, "area_code": null, "followuptime": null, "org": null, "postal": "L7M", "ip_address": "64.229.66.185", "name": "Homa", "lead_id": 74312900, "add_url": null, "country": "Canada", "region": "ON", "unsubscribe_source": null, "longitude": -79.8701, "subscribed_at": "2019-01-01T09:30:03.463783-05:00", "stop_method": null, "unsubscribe_timestamp": null } { "isp": null, "subscriber_id": 3572674791, "legacy_name": null, "subscriber_source": null, "dma_code": null, "id": 3572674791, "custom_fields": { "sha1": null, "subscription date": null, "gclid": null, "timestamp": null, "receipt": null, "system": null, "birthday": null, "cbid": null }, "subscriber_uuid": "f36b791d-db77-4bcb-8d12-579a52fcc10b", "city": null, "verified": 0, "self": "http://subscriber.service.production.consul/subscriber/3572674791", "verification_time": null, "subscribe_method": "api", "stop_time": null, "list_id": 4385291, "latitude": null, "email": "reikiartlove@outlook.com", "status": "subscribed", "last_followup": 1001, "area_code": null, "followuptime": null, "org": null, "postal": null, "ip_address": null, "name": null, "lead_id": 78370684, "add_url": null, "country": null, "region": null, "unsubscribe_source": null, "longitude": null, "subscribed_at": "2020-09-25T01:58:52.823722-04:00", "stop_method": null, "unsubscribe_timestamp": null } { "isp": null, "subscriber_id": 3572674791, "legacy_name": null, "subscriber_source": null, "dma_code": null, "id": 3572674791, "custom_fields": { "sha1": null, "subscription date": null, "gclid": null, "timestamp": null, "receipt": null, "system": null, "birthday": null, "cbid": null }, "subscriber_uuid": "f36b791d-db77-4bcb-8d12-579a52fcc10b", "city": null, "verified": 0, "self": "http://subscriber.service.production.consul/subscriber/3572674791", "verification_time": null, "subscribe_method": "api", "stop_time": null, "list_id": 4385291, "latitude": null, "email": "reikiartlove@outlook.com", "status": "subscribed", "last_followup": 1001, "area_code": null, "followuptime": null, "org": null, "postal": null, "ip_address": null, "name": null, "lead_id": 78370684, "add_url": null, "country": null, "region": null, "unsubscribe_source": null, "longitude": null, "subscribed_at": "2020-09-25T01:58:52.823722-04:00", "stop_method": null, "unsubscribe_timestamp": null } { "isp": null, "subscriber_id": 1018258237, "legacy_name": "", "subscriber_source": "just_creative_popup_", "dma_code": "0", "id": 1018258237, "custom_fields": {}, "subscriber_uuid": "68afa958-7c60-42de-a87b-cb04c871a728", "city": "Porto Alegre", "verified": 1, "self": "http://subscriber.service.production.consul/subscriber/1018258237", "verification_time": "2015-05-06T09:44:08", "subscribe_method": "webform", "stop_time": null, "list_id": 3491206, "latitude": -30.0333, "email": "aferreira385@gmail.com", "status": "subscribed", "last_followup": 7, "area_code": "0", "followuptime": "2018-10-15T23:04:12.062433-04:00", "org": null, "postal": null, "ip_address": "177.6.1.131", "name": "", "lead_id": 55748845, "add_url": "http://justcreative.com/", "country": "Brazil", "region": "23", "unsubscribe_source": null, "longitude": -51.2, "subscribed_at": "2015-05-06T09:43:32-04:00", "stop_method": null, "unsubscribe_timestamp": null } #+end_src #+begin_src python :results output :eval never import io import json import fastavro import requests import pika schema = fastavro.parse_schema( requests.get("http://schema.aweberprod.com/avro/subscriber.rebuild.v1.avsc").json() ) def subscribers(): with open("data/9c/f3c4cc-44a5-4ff4-bdf5-bc04b214a8e3/subscribers.jsons", "r") as f: for line in f: if "SubscriberNotFound" in line: continue yield json.loads(line) conn = pika.BlockingConnection( pika.URLParameters("amqp://admin:rabbitmq@rabbitmq.aweberprod.com:5672/%2F") ) channel = conn.channel() for subscriber in subscribers(): stream = io.BytesIO() fastavro.schemaless_writer( stream, schema, {"subscriber": subscriber["subscriber_uuid"]} ) body = stream.getvalue() channel.basic_publish( "rpc", "subscriber.rebuild", body, pika.BasicProperties( app_id="correl/1.0.0", content_type="application/vnd.apache.avro.datum", type="subscriber.rebuild.v1", ), ) conn.close() #+end_src #+RESULTS: