455 lines
12 KiB
Org Mode
455 lines
12 KiB
Org Mode
|
:PROPERTIES:
|
||
|
:ID: 9cf3c4cc-44a5-4ff4-bdf5-bc04b214a8e3
|
||
|
:END:
|
||
|
#+title: 2021-12-28
|
||
|
|
||
|
* Identifying and correcting missing recipient records :ATTACH:
|
||
|
#+begin_src python :results file :eval no-export
|
||
|
import dataclasses
|
||
|
import json
|
||
|
|
||
|
import requests
|
||
|
|
||
|
source = "data/9c/f3c4cc-44a5-4ff4-bdf5-bc04b214a8e3/unique-recipients.jsons"
|
||
|
destination = "data/9c/f3c4cc-44a5-4ff4-bdf5-bc04b214a8e3/subscribers.jsons"
|
||
|
|
||
|
with open(destination, 'w') as f_out:
|
||
|
with open(source, 'r') as f_in:
|
||
|
for line in f_in:
|
||
|
row = json.loads(line)
|
||
|
uuid = row['recipient']
|
||
|
mapping = requests.get(f'https://mapping.aweberprod.com/{uuid}').json()
|
||
|
subscriber = requests.get(f'http://subscriber.service.production.consul/subscriber/{mapping["value"]}').json()
|
||
|
print(json.dumps(subscriber), file=f_out)
|
||
|
|
||
|
return destination
|
||
|
#+end_src
|
||
|
|
||
|
#+RESULTS:
|
||
|
[[file:data/9c/f3c4cc-44a5-4ff4-bdf5-bc04b214a8e3/subscribers.jsons]]
|
||
|
|
||
|
#+begin_src python :exports both :eval no-export
|
||
|
import collections
|
||
|
import json
|
||
|
|
||
|
import requests
|
||
|
|
||
|
list_ids = collections.Counter()
|
||
|
with open("data/9c/f3c4cc-44a5-4ff4-bdf5-bc04b214a8e3/subscribers.jsons") as f:
|
||
|
for line in f:
|
||
|
if "SubscriberNotFound" in line:
|
||
|
continue
|
||
|
row = json.loads(line)
|
||
|
list_ids[row["list_id"]] += 1
|
||
|
|
||
|
lists = [
|
||
|
["List ID", "Account ID", "List name", "Friendly list name", "Subscribers"],
|
||
|
None,
|
||
|
]
|
||
|
for list_id, subscribers in list_ids.items():
|
||
|
list_details = requests.get(
|
||
|
f"http://list.service.production.consul/v1/lists/{list_id}"
|
||
|
).json()
|
||
|
lists.append(
|
||
|
[
|
||
|
list_details["list_id"],
|
||
|
list_details["account_id"],
|
||
|
list_details["list_name"],
|
||
|
list_details["friendly_list_name"],
|
||
|
subscribers,
|
||
|
]
|
||
|
)
|
||
|
|
||
|
return lists
|
||
|
#+end_src
|
||
|
|
||
|
#+RESULTS:
|
||
|
| List ID | Account ID | List name | Friendly list name | Subscribers |
|
||
|
|---------+------------+---------------+----------------------+-------------|
|
||
|
| 4385291 | 234390 | awlist4385291 | Cosmos Subscribers | 6 |
|
||
|
| 3491206 | 878912 | awlist3491206 | Just Creative | 1 |
|
||
|
| 6170662 | 979644 | awlist6170662 | 2,000 ar@nullsum.net | 1504 |
|
||
|
|
||
|
This is a breakdown of the existing subscribers that failed due to missing
|
||
|
recipient records. Most of the missing recipients belong to Andrew's account.
|
||
|
|
||
|
#+begin_src bash :results output :exports both :eval no-export
|
||
|
cat data/9c/f3c4cc-44a5-4ff4-bdf5-bc04b214a8e3/subscribers.jsons \
|
||
|
| grep '"list_id": 6170662' \
|
||
|
| jq -r .email \
|
||
|
| cut -d '@' -f 2 \
|
||
|
| sort | uniq -c
|
||
|
#+end_src
|
||
|
|
||
|
#+RESULTS:
|
||
|
: 1504 nullsum.net
|
||
|
|
||
|
These seem related to an issue he had a while back where, by using a lot of
|
||
|
subscribers with the same email address but with =+= identifiers to uniqueify
|
||
|
them, he ran into an edge race condition with recipient record updates:
|
||
|
https://aweber.slack.com/archives/CF62W6D5G/p1637680253327200.
|
||
|
|
||
|
These are the 7 remaining affected subscribers:
|
||
|
|
||
|
#+begin_src bash :results output :wrap src json :exports both :eval no-export
|
||
|
cat data/9c/f3c4cc-44a5-4ff4-bdf5-bc04b214a8e3/subscribers.jsons \
|
||
|
| egrep '"list_id": (4385291|3491206)' \
|
||
|
| jq -r .
|
||
|
#+end_src
|
||
|
|
||
|
#+RESULTS:
|
||
|
#+begin_src json
|
||
|
{
|
||
|
"isp": null,
|
||
|
"subscriber_id": 2565282848,
|
||
|
"legacy_name": " ",
|
||
|
"subscriber_source": "CEP",
|
||
|
"dma_code": "510",
|
||
|
"id": 2565282848,
|
||
|
"custom_fields": {
|
||
|
"sha1": null,
|
||
|
"subscription date": null,
|
||
|
"gclid": null,
|
||
|
"timestamp": null,
|
||
|
"receipt": null,
|
||
|
"system": null,
|
||
|
"birthday": " ",
|
||
|
"cbid": null
|
||
|
},
|
||
|
"subscriber_uuid": "1ead2f7c-69af-483b-88d1-6d532df52df1",
|
||
|
"city": "Cuyahoga Falls",
|
||
|
"verified": 0,
|
||
|
"self": "http://subscriber.service.production.consul/subscriber/2565282848",
|
||
|
"verification_time": null,
|
||
|
"subscribe_method": "api",
|
||
|
"stop_time": null,
|
||
|
"list_id": 4385291,
|
||
|
"latitude": 41.1482,
|
||
|
"email": "goog@neo.rr.com",
|
||
|
"status": "subscribed",
|
||
|
"last_followup": 1001,
|
||
|
"area_code": null,
|
||
|
"followuptime": null,
|
||
|
"org": null,
|
||
|
"postal": "44221",
|
||
|
"ip_address": "184.56.13.147",
|
||
|
"name": "Ron",
|
||
|
"lead_id": 70547273,
|
||
|
"add_url": null,
|
||
|
"country": "United States",
|
||
|
"region": "OH",
|
||
|
"unsubscribe_source": null,
|
||
|
"longitude": -81.4736,
|
||
|
"subscribed_at": "2018-05-20T12:40:36.263651-04:00",
|
||
|
"stop_method": null,
|
||
|
"unsubscribe_timestamp": null
|
||
|
}
|
||
|
{
|
||
|
"isp": null,
|
||
|
"subscriber_id": 2565282848,
|
||
|
"legacy_name": " ",
|
||
|
"subscriber_source": "CEP",
|
||
|
"dma_code": "510",
|
||
|
"id": 2565282848,
|
||
|
"custom_fields": {
|
||
|
"sha1": null,
|
||
|
"subscription date": null,
|
||
|
"gclid": null,
|
||
|
"timestamp": null,
|
||
|
"receipt": null,
|
||
|
"system": null,
|
||
|
"birthday": " ",
|
||
|
"cbid": null
|
||
|
},
|
||
|
"subscriber_uuid": "1ead2f7c-69af-483b-88d1-6d532df52df1",
|
||
|
"city": "Cuyahoga Falls",
|
||
|
"verified": 0,
|
||
|
"self": "http://subscriber.service.production.consul/subscriber/2565282848",
|
||
|
"verification_time": null,
|
||
|
"subscribe_method": "api",
|
||
|
"stop_time": null,
|
||
|
"list_id": 4385291,
|
||
|
"latitude": 41.1482,
|
||
|
"email": "goog@neo.rr.com",
|
||
|
"status": "subscribed",
|
||
|
"last_followup": 1001,
|
||
|
"area_code": null,
|
||
|
"followuptime": null,
|
||
|
"org": null,
|
||
|
"postal": "44221",
|
||
|
"ip_address": "184.56.13.147",
|
||
|
"name": "Ron",
|
||
|
"lead_id": 70547273,
|
||
|
"add_url": null,
|
||
|
"country": "United States",
|
||
|
"region": "OH",
|
||
|
"unsubscribe_source": null,
|
||
|
"longitude": -81.4736,
|
||
|
"subscribed_at": "2018-05-20T12:40:36.263651-04:00",
|
||
|
"stop_method": null,
|
||
|
"unsubscribe_timestamp": null
|
||
|
}
|
||
|
{
|
||
|
"isp": null,
|
||
|
"subscriber_id": 3024684746,
|
||
|
"legacy_name": " ",
|
||
|
"subscriber_source": "CEP",
|
||
|
"dma_code": null,
|
||
|
"id": 3024684746,
|
||
|
"custom_fields": {
|
||
|
"sha1": null,
|
||
|
"subscription date": null,
|
||
|
"gclid": "",
|
||
|
"timestamp": null,
|
||
|
"receipt": null,
|
||
|
"system": null,
|
||
|
"birthday": " ",
|
||
|
"cbid": null
|
||
|
},
|
||
|
"subscriber_uuid": "a3250da2-00e6-4811-9029-690ad617e38e",
|
||
|
"city": "Burlington",
|
||
|
"verified": 0,
|
||
|
"self": "http://subscriber.service.production.consul/subscriber/3024684746",
|
||
|
"verification_time": null,
|
||
|
"subscribe_method": "api",
|
||
|
"stop_time": null,
|
||
|
"list_id": 4385291,
|
||
|
"latitude": 43.4342,
|
||
|
"email": "homa.partovi1340@gmail.com",
|
||
|
"status": "subscribed",
|
||
|
"last_followup": 1001,
|
||
|
"area_code": null,
|
||
|
"followuptime": null,
|
||
|
"org": null,
|
||
|
"postal": "L7M",
|
||
|
"ip_address": "64.229.66.185",
|
||
|
"name": "Homa",
|
||
|
"lead_id": 74312900,
|
||
|
"add_url": null,
|
||
|
"country": "Canada",
|
||
|
"region": "ON",
|
||
|
"unsubscribe_source": null,
|
||
|
"longitude": -79.8701,
|
||
|
"subscribed_at": "2019-01-01T09:30:03.463783-05:00",
|
||
|
"stop_method": null,
|
||
|
"unsubscribe_timestamp": null
|
||
|
}
|
||
|
{
|
||
|
"isp": null,
|
||
|
"subscriber_id": 3024684746,
|
||
|
"legacy_name": " ",
|
||
|
"subscriber_source": "CEP",
|
||
|
"dma_code": null,
|
||
|
"id": 3024684746,
|
||
|
"custom_fields": {
|
||
|
"sha1": null,
|
||
|
"subscription date": null,
|
||
|
"gclid": "",
|
||
|
"timestamp": null,
|
||
|
"receipt": null,
|
||
|
"system": null,
|
||
|
"birthday": " ",
|
||
|
"cbid": null
|
||
|
},
|
||
|
"subscriber_uuid": "a3250da2-00e6-4811-9029-690ad617e38e",
|
||
|
"city": "Burlington",
|
||
|
"verified": 0,
|
||
|
"self": "http://subscriber.service.production.consul/subscriber/3024684746",
|
||
|
"verification_time": null,
|
||
|
"subscribe_method": "api",
|
||
|
"stop_time": null,
|
||
|
"list_id": 4385291,
|
||
|
"latitude": 43.4342,
|
||
|
"email": "homa.partovi1340@gmail.com",
|
||
|
"status": "subscribed",
|
||
|
"last_followup": 1001,
|
||
|
"area_code": null,
|
||
|
"followuptime": null,
|
||
|
"org": null,
|
||
|
"postal": "L7M",
|
||
|
"ip_address": "64.229.66.185",
|
||
|
"name": "Homa",
|
||
|
"lead_id": 74312900,
|
||
|
"add_url": null,
|
||
|
"country": "Canada",
|
||
|
"region": "ON",
|
||
|
"unsubscribe_source": null,
|
||
|
"longitude": -79.8701,
|
||
|
"subscribed_at": "2019-01-01T09:30:03.463783-05:00",
|
||
|
"stop_method": null,
|
||
|
"unsubscribe_timestamp": null
|
||
|
}
|
||
|
{
|
||
|
"isp": null,
|
||
|
"subscriber_id": 3572674791,
|
||
|
"legacy_name": null,
|
||
|
"subscriber_source": null,
|
||
|
"dma_code": null,
|
||
|
"id": 3572674791,
|
||
|
"custom_fields": {
|
||
|
"sha1": null,
|
||
|
"subscription date": null,
|
||
|
"gclid": null,
|
||
|
"timestamp": null,
|
||
|
"receipt": null,
|
||
|
"system": null,
|
||
|
"birthday": null,
|
||
|
"cbid": null
|
||
|
},
|
||
|
"subscriber_uuid": "f36b791d-db77-4bcb-8d12-579a52fcc10b",
|
||
|
"city": null,
|
||
|
"verified": 0,
|
||
|
"self": "http://subscriber.service.production.consul/subscriber/3572674791",
|
||
|
"verification_time": null,
|
||
|
"subscribe_method": "api",
|
||
|
"stop_time": null,
|
||
|
"list_id": 4385291,
|
||
|
"latitude": null,
|
||
|
"email": "reikiartlove@outlook.com",
|
||
|
"status": "subscribed",
|
||
|
"last_followup": 1001,
|
||
|
"area_code": null,
|
||
|
"followuptime": null,
|
||
|
"org": null,
|
||
|
"postal": null,
|
||
|
"ip_address": null,
|
||
|
"name": null,
|
||
|
"lead_id": 78370684,
|
||
|
"add_url": null,
|
||
|
"country": null,
|
||
|
"region": null,
|
||
|
"unsubscribe_source": null,
|
||
|
"longitude": null,
|
||
|
"subscribed_at": "2020-09-25T01:58:52.823722-04:00",
|
||
|
"stop_method": null,
|
||
|
"unsubscribe_timestamp": null
|
||
|
}
|
||
|
{
|
||
|
"isp": null,
|
||
|
"subscriber_id": 3572674791,
|
||
|
"legacy_name": null,
|
||
|
"subscriber_source": null,
|
||
|
"dma_code": null,
|
||
|
"id": 3572674791,
|
||
|
"custom_fields": {
|
||
|
"sha1": null,
|
||
|
"subscription date": null,
|
||
|
"gclid": null,
|
||
|
"timestamp": null,
|
||
|
"receipt": null,
|
||
|
"system": null,
|
||
|
"birthday": null,
|
||
|
"cbid": null
|
||
|
},
|
||
|
"subscriber_uuid": "f36b791d-db77-4bcb-8d12-579a52fcc10b",
|
||
|
"city": null,
|
||
|
"verified": 0,
|
||
|
"self": "http://subscriber.service.production.consul/subscriber/3572674791",
|
||
|
"verification_time": null,
|
||
|
"subscribe_method": "api",
|
||
|
"stop_time": null,
|
||
|
"list_id": 4385291,
|
||
|
"latitude": null,
|
||
|
"email": "reikiartlove@outlook.com",
|
||
|
"status": "subscribed",
|
||
|
"last_followup": 1001,
|
||
|
"area_code": null,
|
||
|
"followuptime": null,
|
||
|
"org": null,
|
||
|
"postal": null,
|
||
|
"ip_address": null,
|
||
|
"name": null,
|
||
|
"lead_id": 78370684,
|
||
|
"add_url": null,
|
||
|
"country": null,
|
||
|
"region": null,
|
||
|
"unsubscribe_source": null,
|
||
|
"longitude": null,
|
||
|
"subscribed_at": "2020-09-25T01:58:52.823722-04:00",
|
||
|
"stop_method": null,
|
||
|
"unsubscribe_timestamp": null
|
||
|
}
|
||
|
{
|
||
|
"isp": null,
|
||
|
"subscriber_id": 1018258237,
|
||
|
"legacy_name": "",
|
||
|
"subscriber_source": "just_creative_popup_",
|
||
|
"dma_code": "0",
|
||
|
"id": 1018258237,
|
||
|
"custom_fields": {},
|
||
|
"subscriber_uuid": "68afa958-7c60-42de-a87b-cb04c871a728",
|
||
|
"city": "Porto Alegre",
|
||
|
"verified": 1,
|
||
|
"self": "http://subscriber.service.production.consul/subscriber/1018258237",
|
||
|
"verification_time": "2015-05-06T09:44:08",
|
||
|
"subscribe_method": "webform",
|
||
|
"stop_time": null,
|
||
|
"list_id": 3491206,
|
||
|
"latitude": -30.0333,
|
||
|
"email": "aferreira385@gmail.com",
|
||
|
"status": "subscribed",
|
||
|
"last_followup": 7,
|
||
|
"area_code": "0",
|
||
|
"followuptime": "2018-10-15T23:04:12.062433-04:00",
|
||
|
"org": null,
|
||
|
"postal": null,
|
||
|
"ip_address": "177.6.1.131",
|
||
|
"name": "",
|
||
|
"lead_id": 55748845,
|
||
|
"add_url": "http://justcreative.com/",
|
||
|
"country": "Brazil",
|
||
|
"region": "23",
|
||
|
"unsubscribe_source": null,
|
||
|
"longitude": -51.2,
|
||
|
"subscribed_at": "2015-05-06T09:43:32-04:00",
|
||
|
"stop_method": null,
|
||
|
"unsubscribe_timestamp": null
|
||
|
}
|
||
|
#+end_src
|
||
|
|
||
|
#+begin_src python :results output :eval never
|
||
|
import io
|
||
|
import json
|
||
|
|
||
|
import fastavro
|
||
|
import requests
|
||
|
import pika
|
||
|
|
||
|
schema = fastavro.parse_schema(
|
||
|
requests.get("http://schema.aweberprod.com/avro/subscriber.rebuild.v1.avsc").json()
|
||
|
)
|
||
|
|
||
|
|
||
|
def subscribers():
|
||
|
with open("data/9c/f3c4cc-44a5-4ff4-bdf5-bc04b214a8e3/subscribers.jsons", "r") as f:
|
||
|
for line in f:
|
||
|
if "SubscriberNotFound" in line:
|
||
|
continue
|
||
|
yield json.loads(line)
|
||
|
|
||
|
|
||
|
conn = pika.BlockingConnection(
|
||
|
pika.URLParameters("amqp://admin:rabbitmq@rabbitmq.aweberprod.com:5672/%2F")
|
||
|
)
|
||
|
channel = conn.channel()
|
||
|
for subscriber in subscribers():
|
||
|
stream = io.BytesIO()
|
||
|
fastavro.schemaless_writer(
|
||
|
stream, schema, {"subscriber": subscriber["subscriber_uuid"]}
|
||
|
)
|
||
|
body = stream.getvalue()
|
||
|
channel.basic_publish(
|
||
|
"rpc",
|
||
|
"subscriber.rebuild",
|
||
|
body,
|
||
|
pika.BasicProperties(
|
||
|
app_id="correl/1.0.0",
|
||
|
content_type="application/vnd.apache.avro.datum",
|
||
|
type="subscriber.rebuild.v1",
|
||
|
),
|
||
|
)
|
||
|
conn.close()
|
||
|
#+end_src
|
||
|
|
||
|
#+RESULTS:
|