roam/daily/2021-12-28.org
2022-01-10 10:57:04 -05:00

12 KiB

2021-12-28

Identifying and correcting missing recipient records   ATTACH

  import dataclasses
  import json

  import requests

  source = "data/9c/f3c4cc-44a5-4ff4-bdf5-bc04b214a8e3/unique-recipients.jsons"
  destination = "data/9c/f3c4cc-44a5-4ff4-bdf5-bc04b214a8e3/subscribers.jsons"

  with open(destination, 'w') as f_out:
      with open(source, 'r') as f_in:
          for line in f_in:
              row = json.loads(line)
              uuid = row['recipient']
              mapping = requests.get(f'https://mapping.aweberprod.com/{uuid}').json()
              subscriber = requests.get(f'http://subscriber.service.production.consul/subscriber/{mapping["value"]}').json()
              print(json.dumps(subscriber), file=f_out)

  return destination

/correlr/roam/src/commit/e8ab186b48269ca9a582136e8b871a450c8f1fb7/daily/data/9c/f3c4cc-44a5-4ff4-bdf5-bc04b214a8e3/subscribers.jsons

  import collections
  import json

  import requests

  list_ids = collections.Counter()
  with open("data/9c/f3c4cc-44a5-4ff4-bdf5-bc04b214a8e3/subscribers.jsons") as f:
      for line in f:
          if "SubscriberNotFound" in line:
              continue
          row = json.loads(line)
          list_ids[row["list_id"]] += 1

  lists = [
      ["List ID", "Account ID", "List name", "Friendly list name", "Subscribers"],
      None,
  ]
  for list_id, subscribers in list_ids.items():
      list_details = requests.get(
          f"http://list.service.production.consul/v1/lists/{list_id}"
      ).json()
      lists.append(
          [
              list_details["list_id"],
              list_details["account_id"],
              list_details["list_name"],
              list_details["friendly_list_name"],
              subscribers,
          ]
      )

  return lists
List ID Account ID List name Friendly list name Subscribers
4385291 234390 awlist4385291 Cosmos Subscribers 6
3491206 878912 awlist3491206 Just Creative 1
6170662 979644 awlist6170662 2,000 ar@nullsum.net 1504

This is a breakdown of the existing subscribers that failed due to missing recipient records. Most of the missing recipients belong to Andrew's account.

  cat data/9c/f3c4cc-44a5-4ff4-bdf5-bc04b214a8e3/subscribers.jsons \
      | grep '"list_id": 6170662' \
      | jq -r .email \
      | cut -d '@' -f 2 \
      | sort | uniq -c
   1504 nullsum.net

These seem related to an issue he had a while back where, by using a lot of subscribers with the same email address but with + identifiers to uniqueify them, he ran into an edge race condition with recipient record updates: https://aweber.slack.com/archives/CF62W6D5G/p1637680253327200.

These are the 7 remaining affected subscribers:

  cat data/9c/f3c4cc-44a5-4ff4-bdf5-bc04b214a8e3/subscribers.jsons \
      | egrep '"list_id": (4385291|3491206)' \
      | jq -r .
{
  "isp": null,
  "subscriber_id": 2565282848,
  "legacy_name": " ",
  "subscriber_source": "CEP",
  "dma_code": "510",
  "id": 2565282848,
  "custom_fields": {
    "sha1": null,
    "subscription date": null,
    "gclid": null,
    "timestamp": null,
    "receipt": null,
    "system": null,
    "birthday": " ",
    "cbid": null
  },
  "subscriber_uuid": "1ead2f7c-69af-483b-88d1-6d532df52df1",
  "city": "Cuyahoga Falls",
  "verified": 0,
  "self": "http://subscriber.service.production.consul/subscriber/2565282848",
  "verification_time": null,
  "subscribe_method": "api",
  "stop_time": null,
  "list_id": 4385291,
  "latitude": 41.1482,
  "email": "goog@neo.rr.com",
  "status": "subscribed",
  "last_followup": 1001,
  "area_code": null,
  "followuptime": null,
  "org": null,
  "postal": "44221",
  "ip_address": "184.56.13.147",
  "name": "Ron",
  "lead_id": 70547273,
  "add_url": null,
  "country": "United States",
  "region": "OH",
  "unsubscribe_source": null,
  "longitude": -81.4736,
  "subscribed_at": "2018-05-20T12:40:36.263651-04:00",
  "stop_method": null,
  "unsubscribe_timestamp": null
}
{
  "isp": null,
  "subscriber_id": 2565282848,
  "legacy_name": " ",
  "subscriber_source": "CEP",
  "dma_code": "510",
  "id": 2565282848,
  "custom_fields": {
    "sha1": null,
    "subscription date": null,
    "gclid": null,
    "timestamp": null,
    "receipt": null,
    "system": null,
    "birthday": " ",
    "cbid": null
  },
  "subscriber_uuid": "1ead2f7c-69af-483b-88d1-6d532df52df1",
  "city": "Cuyahoga Falls",
  "verified": 0,
  "self": "http://subscriber.service.production.consul/subscriber/2565282848",
  "verification_time": null,
  "subscribe_method": "api",
  "stop_time": null,
  "list_id": 4385291,
  "latitude": 41.1482,
  "email": "goog@neo.rr.com",
  "status": "subscribed",
  "last_followup": 1001,
  "area_code": null,
  "followuptime": null,
  "org": null,
  "postal": "44221",
  "ip_address": "184.56.13.147",
  "name": "Ron",
  "lead_id": 70547273,
  "add_url": null,
  "country": "United States",
  "region": "OH",
  "unsubscribe_source": null,
  "longitude": -81.4736,
  "subscribed_at": "2018-05-20T12:40:36.263651-04:00",
  "stop_method": null,
  "unsubscribe_timestamp": null
}
{
  "isp": null,
  "subscriber_id": 3024684746,
  "legacy_name": " ",
  "subscriber_source": "CEP",
  "dma_code": null,
  "id": 3024684746,
  "custom_fields": {
    "sha1": null,
    "subscription date": null,
    "gclid": "",
    "timestamp": null,
    "receipt": null,
    "system": null,
    "birthday": " ",
    "cbid": null
  },
  "subscriber_uuid": "a3250da2-00e6-4811-9029-690ad617e38e",
  "city": "Burlington",
  "verified": 0,
  "self": "http://subscriber.service.production.consul/subscriber/3024684746",
  "verification_time": null,
  "subscribe_method": "api",
  "stop_time": null,
  "list_id": 4385291,
  "latitude": 43.4342,
  "email": "homa.partovi1340@gmail.com",
  "status": "subscribed",
  "last_followup": 1001,
  "area_code": null,
  "followuptime": null,
  "org": null,
  "postal": "L7M",
  "ip_address": "64.229.66.185",
  "name": "Homa",
  "lead_id": 74312900,
  "add_url": null,
  "country": "Canada",
  "region": "ON",
  "unsubscribe_source": null,
  "longitude": -79.8701,
  "subscribed_at": "2019-01-01T09:30:03.463783-05:00",
  "stop_method": null,
  "unsubscribe_timestamp": null
}
{
  "isp": null,
  "subscriber_id": 3024684746,
  "legacy_name": " ",
  "subscriber_source": "CEP",
  "dma_code": null,
  "id": 3024684746,
  "custom_fields": {
    "sha1": null,
    "subscription date": null,
    "gclid": "",
    "timestamp": null,
    "receipt": null,
    "system": null,
    "birthday": " ",
    "cbid": null
  },
  "subscriber_uuid": "a3250da2-00e6-4811-9029-690ad617e38e",
  "city": "Burlington",
  "verified": 0,
  "self": "http://subscriber.service.production.consul/subscriber/3024684746",
  "verification_time": null,
  "subscribe_method": "api",
  "stop_time": null,
  "list_id": 4385291,
  "latitude": 43.4342,
  "email": "homa.partovi1340@gmail.com",
  "status": "subscribed",
  "last_followup": 1001,
  "area_code": null,
  "followuptime": null,
  "org": null,
  "postal": "L7M",
  "ip_address": "64.229.66.185",
  "name": "Homa",
  "lead_id": 74312900,
  "add_url": null,
  "country": "Canada",
  "region": "ON",
  "unsubscribe_source": null,
  "longitude": -79.8701,
  "subscribed_at": "2019-01-01T09:30:03.463783-05:00",
  "stop_method": null,
  "unsubscribe_timestamp": null
}
{
  "isp": null,
  "subscriber_id": 3572674791,
  "legacy_name": null,
  "subscriber_source": null,
  "dma_code": null,
  "id": 3572674791,
  "custom_fields": {
    "sha1": null,
    "subscription date": null,
    "gclid": null,
    "timestamp": null,
    "receipt": null,
    "system": null,
    "birthday": null,
    "cbid": null
  },
  "subscriber_uuid": "f36b791d-db77-4bcb-8d12-579a52fcc10b",
  "city": null,
  "verified": 0,
  "self": "http://subscriber.service.production.consul/subscriber/3572674791",
  "verification_time": null,
  "subscribe_method": "api",
  "stop_time": null,
  "list_id": 4385291,
  "latitude": null,
  "email": "reikiartlove@outlook.com",
  "status": "subscribed",
  "last_followup": 1001,
  "area_code": null,
  "followuptime": null,
  "org": null,
  "postal": null,
  "ip_address": null,
  "name": null,
  "lead_id": 78370684,
  "add_url": null,
  "country": null,
  "region": null,
  "unsubscribe_source": null,
  "longitude": null,
  "subscribed_at": "2020-09-25T01:58:52.823722-04:00",
  "stop_method": null,
  "unsubscribe_timestamp": null
}
{
  "isp": null,
  "subscriber_id": 3572674791,
  "legacy_name": null,
  "subscriber_source": null,
  "dma_code": null,
  "id": 3572674791,
  "custom_fields": {
    "sha1": null,
    "subscription date": null,
    "gclid": null,
    "timestamp": null,
    "receipt": null,
    "system": null,
    "birthday": null,
    "cbid": null
  },
  "subscriber_uuid": "f36b791d-db77-4bcb-8d12-579a52fcc10b",
  "city": null,
  "verified": 0,
  "self": "http://subscriber.service.production.consul/subscriber/3572674791",
  "verification_time": null,
  "subscribe_method": "api",
  "stop_time": null,
  "list_id": 4385291,
  "latitude": null,
  "email": "reikiartlove@outlook.com",
  "status": "subscribed",
  "last_followup": 1001,
  "area_code": null,
  "followuptime": null,
  "org": null,
  "postal": null,
  "ip_address": null,
  "name": null,
  "lead_id": 78370684,
  "add_url": null,
  "country": null,
  "region": null,
  "unsubscribe_source": null,
  "longitude": null,
  "subscribed_at": "2020-09-25T01:58:52.823722-04:00",
  "stop_method": null,
  "unsubscribe_timestamp": null
}
{
  "isp": null,
  "subscriber_id": 1018258237,
  "legacy_name": "",
  "subscriber_source": "just_creative_popup_",
  "dma_code": "0",
  "id": 1018258237,
  "custom_fields": {},
  "subscriber_uuid": "68afa958-7c60-42de-a87b-cb04c871a728",
  "city": "Porto Alegre",
  "verified": 1,
  "self": "http://subscriber.service.production.consul/subscriber/1018258237",
  "verification_time": "2015-05-06T09:44:08",
  "subscribe_method": "webform",
  "stop_time": null,
  "list_id": 3491206,
  "latitude": -30.0333,
  "email": "aferreira385@gmail.com",
  "status": "subscribed",
  "last_followup": 7,
  "area_code": "0",
  "followuptime": "2018-10-15T23:04:12.062433-04:00",
  "org": null,
  "postal": null,
  "ip_address": "177.6.1.131",
  "name": "",
  "lead_id": 55748845,
  "add_url": "http://justcreative.com/",
  "country": "Brazil",
  "region": "23",
  "unsubscribe_source": null,
  "longitude": -51.2,
  "subscribed_at": "2015-05-06T09:43:32-04:00",
  "stop_method": null,
  "unsubscribe_timestamp": null
}
  import io
  import json

  import fastavro
  import requests
  import pika

  schema = fastavro.parse_schema(
      requests.get("http://schema.aweberprod.com/avro/subscriber.rebuild.v1.avsc").json()
  )


  def subscribers():
      with open("data/9c/f3c4cc-44a5-4ff4-bdf5-bc04b214a8e3/subscribers.jsons", "r") as f:
          for line in f:
              if "SubscriberNotFound" in line:
                  continue
              yield json.loads(line)


  conn = pika.BlockingConnection(
      pika.URLParameters("amqp://admin:rabbitmq@rabbitmq.aweberprod.com:5672/%2F")
  )
  channel = conn.channel()
  for subscriber in subscribers():
      stream = io.BytesIO()
      fastavro.schemaless_writer(
          stream, schema, {"subscriber": subscriber["subscriber_uuid"]}
      )
      body = stream.getvalue()
      channel.basic_publish(
          "rpc",
          "subscriber.rebuild",
          body,
          pika.BasicProperties(
              app_id="correl/1.0.0",
              content_type="application/vnd.apache.avro.datum",
              type="subscriber.rebuild.v1",
          ),
      )
  conn.close()

#+RESULTS: