import React from 'react';
import ServiceNavigation from '../../../ServiceNavigation';
import {
    AppLayout,
    Button,
    ColumnLayout,
    FormField,
    Icon,
    Select,
    Container,
    Header,
    Box,
    Grid,
    SpaceBetween,
    Link
} from '../../../../aws-ui-components';
import '../../../../styles/servicehomepage.scss';

import MarkdownRender from "../../../utils/MarkdownRender";

// Component ServiceHomepage is a skeleton of a service's homepage using AWS-UI React components.
export default () => {
    return (
        <AppLayout
            disableContentPaddings={true}
            navigation={<ServiceNavigation />} // Navigation panel content imported from './ServiceNavigation.jsx'
            content={<Content />}
            contentType="default"
            navigationOpen={true}
            toolsHide={true}
        />
    );
};

// The content in the main content area of the App layout
const Content = () => (
    <Box padding="s" margin={{bottom: 'l'}}>
        <h1>Route 53 DOCtor: Control Plane</h1>
        <h4>Last Updated: April 19th, 2024</h4>

        <Box padding={{top: 'l'}}>
            <Grid
                gridDefinition={[
                    {colspan: {l:8, s:10, xxs: 12}, offset: {l:2, s: 1, xxs: 0}},
                ]}
            >
                <SpaceBetween size="l">
                    <Container>
                        <MarkdownRender images={imagePaths} markdown={markdown} />
                    </Container>
                </SpaceBetween>
            </Grid>
        </Box>
    </Box>
);


// The Images
import myImage from "../../../../data/images/logo512.png"

import celestial_view from "../../../../data/r53dr-drawings/documents/doctor-control-plane-final-design/v1/celestial-view.excalidraw.png"
import explain_backup_region_1 from "../../../../data/r53dr-drawings/documents/doctor-control-plane-final-design/v1/Control Plane/backup regions/simple-example-001.excalidraw.png"
import explain_backup_region_4 from "../../../../data/r53dr-drawings/documents/doctor-control-plane-final-design/v1/Control Plane/backup regions/simple-example-002.excalidraw.png"
import detailed_customer_plane from "../../../../data/r53dr-drawings/documents/doctor-control-plane-final-design/v2/detailed-customer-plane.excalidraw.png"
import zoom_out_cell_design from "../../../../data/r53dr-drawings/documents/doctor-control-plane-final-design/v2/zoomed-out-cell-design.excalidraw.png"
import detailed_cell_design from "../../../../data/r53dr-drawings/documents/doctor-control-plane-final-design/v2/detailed-cell-design.excalidraw.png"

import creation_process_1 from "../../../../data/r53dr-drawings/documents/doctor-control-plane-final-design/v2/Creation Process/example-001.excalidraw.png"
import creation_process_2 from "../../../../data/r53dr-drawings/documents/doctor-control-plane-final-design/v2/Creation Process/example-002.excalidraw.png"
import creation_process_3 from "../../../../data/r53dr-drawings/documents/doctor-control-plane-final-design/v2/Creation Process/example-003.excalidraw.png"
import creation_process_4 from "../../../../data/r53dr-drawings/documents/doctor-control-plane-final-design/v2/Creation Process/example-004.excalidraw.png"
import creation_process_5 from "../../../../data/r53dr-drawings/documents/doctor-control-plane-final-design/v2/Creation Process/example-005.png"
import creation_process_6 from "../../../../data/r53dr-drawings/documents/doctor-control-plane-final-design/v2/Creation Process/example-006.excalidraw.png"
import creation_process_7 from "../../../../data/r53dr-drawings/documents/doctor-control-plane-final-design/v2/Creation Process/example-007.excalidraw.png"



const imagePaths = {
    celestial_view:celestial_view,
    explain_backup_region_1:explain_backup_region_1,
    explain_backup_region_4:explain_backup_region_4,
    detailed_customer_plane:detailed_customer_plane,
    zoom_out_cell_design:zoom_out_cell_design,
    detailed_cell_design:detailed_cell_design,

    creation_process_1:creation_process_1,
    creation_process_2:creation_process_2,
    creation_process_3:creation_process_3,
    creation_process_4:creation_process_4,
    creation_process_5:creation_process_5,
    creation_process_6:creation_process_6,
    creation_process_7:creation_process_7,

    myImage: myImage, // Assuming the image component name matches the file name (optional)
};

const markdown = `
# What is DOCtor?

DNS Operational Controller (DOCtor) is an managed infrastructure service for whale internal teams. DOCtor propagates an unsigned byte of information to all DNS servers with 100% availability. DOCtor’s architecture is laid out in five different sections: Customer Control Plane, Customer Data Plane, Cell Plane, Data Plane Egress and Integration Partner. The scope of this document is just talking about the Customer Control Plane and Cell Plane. First, we will baseline our understanding to what is a DOCtet. Then we will explore the overall layout of Customer Control Plane and Cell Plane. Next, we will explain the interaction between the Customer and Cell Plane. Then, we will follow the workflow of a DOCtet through its creation process. Finally, we will deep dive into the design of each service component.
{{center start}} 
![Replace Image](celestial_view)

High level interaction of DOCtor Components
{{center end}}

# What is DOCtet?

DOCtet is play on the word “octet”, coined by Route53 Healthcast engineer Jeremy Mercer. A DOCtet is the resource that customers create when they use DOCtor. The reference to a DOCtet is its DOCtet ID, which is a special UUID that has data embedded within it. Customers reference their DOCtet by taking the DOCtet ID and placing it into Route 53 RRsets in the same place that Route 53 Health Checks currently go.

DOCtor being a regional service, customers must specify the region they want their DOCtet to be created in - all future updates and deletes to this DOCtet's configuration will need to come through this region. This is considered to be the primary region for this DOCtet. A customer must specify a few configuration values when a DOCtet is created. The most important parameter (and one which cannot change later) is the "backup regions" for the DOCtet. These backup regions are additional regions that will be used to provide data plane redundancy. When a DOCtet is created, a UUID is returned to the customer that they will use for further interaction with their DOCtet.

Customers use the DOCtet ID as they would use a Route 53 Health Check ID. The DOCtet ID enables you to configure failover strategies, which can automatically route traffic away from unhealthy resources to healthy ones based on the DOCtet’s value. Another application customers can use a DOCtet for is feature called “evaluate target health” which prioritizes routing decision during resolution to route traffic to healthy resources.


Here is an example of how the customer “Me” sets up a DOCtet. Here I created a DOCtet in IAD with the “backup regions” being PDX and XYZ. I associated my DOCtet with an RRSet pointing to the domain www.bashscri.com and I setup that when the DOCtet is receiving a Healthy Signal, the DNS resolution for the domain points to iad.bashscri.com.
{{center start}} 
![Replace Image](explain_backup_region_1)

Example 1 of using a DOCtet in a “Happy Case”
{{center end}}

Now we are going to explore what happens if I need to trigger a failover, but the IAD region is presently experiencing network issues. Because I configured my DOCtet with multiple backup regions I can simply send the Unhealthy signal though the DOCtor system in those regions and DNS servers globally will get the updated DOCtet status and start returning DNS queries to point to dub.bashscri.com. 

{{center start}} 
![Replace Image](explain_backup_region_4)

Example 2 of how a DOCtet works if the primary region was to go down
{{center end}}

# Customer Control Plane Layout

The Customer Control Plane is responsible for handling the interface between the customers and their DOCtet’s configuration. Within each region there are two service components, the Customer API and Customer API Canary. 

There are different system constructs you can design a system in: global, regional, and zonal. The Customer Control Plane is designed it to be regional. This means that each IA region within a partition will be running the two service components. Customer API and Customer API Canary communicate purely within a region.

{{center start}} 
![Replace Image](detailed_customer_plane)

Layout of the Customer Control Plane
{{center end}}

Customers will use an Customer API to Create, Update, Delete, List, and Get their DOCtet’s configuration. The Customer API stores within Managed Journal Database (Managed Journal DB) the association between Caller Reference and DOCtet as well as keeps track of policy limits and throttle rules. The Customer API Canary constantly interacts with the Customer API, testing all the customer operations. 

# Cell Control Plane Layout

In this section we will be looking at the Cell Plane specifically a Cell within the Cell Plane. Each cell contains an isolated control plane and database. A cell is responsible for holding a fixed amount of DOCtets: 2^22 (~4.2M). The cellular architecture provides regional isolation and horizontal scalability. The component services running with a cell’s Control Plane are: External Control API, Internal Control API, Delta Service, Checkpoint Service, and Propagation Canary. 

Each Cell is responsible for holding only ~4.2M DOCtets. A major deciding factor for this constraint is that service components that are ‘small’ and deal with a finite set of data are easier to maintain and don’t suffer the same limitations and obstacles as a large scaled system. Engineers can make simpler decisions which reduces the complexity. This way, overhead generated by horizontally scaling our system is very minimal. 

A cell is regionally isolated and can easily be horizontally scaled. Since the launch of Route 53's Health Checks, their team has been dealing with issues around expanding capacity. The Health Checks system was originally designed for a small number of Health Checks (~1M), but has expanded to 50M and is still growing. Over the years, AWS has experienced a few COEs which has made them re-evaluate their regional availability posture. One of the ways internal teams decided to increase availability was to utilize the failover feature within Route 53 DNS. This caused the usage of Route 53 Health Checks to greatly increase, and it is now reaching the end of how far it can be horizontally scaled. DOCtor's team, originating from the Route 53 Health Check team, decided to address some of the initial shortcomings of the Route 53 Health Check design. Because all DOCtor "resources" (DOCtets) are constrained to a cell, to add more capacity, we only need to build more cells within the capacity-constrained region.

Route 53, and most of Amazon, use the philosophy of fully isolated Control Plane and Data Plane. This means if your Control Plane were to go down, it will not affect your Data Plane. For a cell, the Control Plane handles the existence of a DOCtet as well its configurations. Whereas, the Data Plane's job is to provide a data transfer pipe within a region for a DOCtet. Both are fully independent.

{{center start}} 
![Replace Image](zoom_out_cell_design)

Layout of a region within the Cell Plane
{{center end}}

Within a cell control plane there are five services: External Control API, Internal Control API, Delta Service, Checkpoint Service, and Propagation Canary. The External Control API is the interface between the Customer API and a cell; it is responsible for handling underlying DOCtet operations, such as Create, Update, Delete, Get, and List. It stores the DOCtet’s configuration and throttle rules within the cell’s Journal DB. The Internal Control API is used for internal operations and directly communicates with the Journal DB. The Delta Service converts that data coming from the Journal Stream to JSON files and uploads them to S3. The Checkpoint Service reads JSONs from S3 and creates a collapsed snapshot of the latest DOCtet configuration for all ~4.2M DOCtets. The Propagation Canary is testing the External Control API operations by generating DOCtets, updating them, and measuring the latency it takes to propagate.


{{center start}} 
![Replace Image](detailed_cell_design)

Layout of a single cell within the Cell Plane
{{center end}}

# Customer and Cell’s Control Plane Interactions

In this section we will explore the interface between customers, the Customer API, the cell's External Control API, and Delta Service. Access to the Customer API requires a valid AWS Account with the proper IAM permissions. Access is furthered restricted by an on-disk “Allow Listing” that gets updated during code deployments. The call rate of customer operations with the Customer API are regulated by Coral Throttle. The settings for Coral Throttle, customer policy limits, and DOCtet Caller Reference associations exist within a Managed Journal Database. When a DOCtet is being created, the Customer API calls the External Control API’s GetCellStats to figure out which cell has capacity, and then directs the call to that cell which returns the DOCtet ID. All DOCtets have a special UUID with data embedded in it, which the Customer API uses to direct the other DOCtet operations such as Update, Delete, Get and List directly to the proper cell’s External Control API. DOCtets are created with non-mutable configuration, like the backup regions, and mutable configurations which control the behavior of the DOCtet’s value though the data plane when there is an issue.

The Customer API interactions with a cell go though the External Control API. The External Control API also requires a valid AWS Account and IAM permissions to successfully call it. The External Control API also uses an on-disk “Allow Listing” that gets updates during code deployments. The Customer API call GetCellStats returns the remaining capacity of the cell. All other DOCtet operations on the External Control API interface with a Managed Journal Database to store and modify their configuration, as well as, fire “topics” to communicate with Journal DB Streaming. Data is then returned to the Customer API based on the operation it was performing.

The Delta Service is a service that subscribes to events from the Journal DB Stream. These events from the Journal DB Stream are filtered specifically for "DOCtet topics". A DOCtet topic contains a snapshot of the configuration and operation that the customer has submitted to the Customer API. This snapshot is assigned a “Change Number” to denote how many changes it has seen. Next, Delta Service calculates a rolling checksum keeping track of all the live data within a cell. Finally, Delta Service packages this information into a single string and appends it to a file based on change number and uploads it to S3.

# An example of CreateDoctet’s Workflow!!

Meet Timmy. Timmy is a customer of Route 53 and he would like to setup a DOCtet in IAD with a backup region in PDX to control failover for this domain. He also wants to set the DOCtet’s configuration to fail-open if he fails to send DOCtet values for five minutes. For the sake of this example, let’s assume that everything is setup with correctly within Route 53 and the only thing Timmy needs is a DOCtet to associate with his RRSet. 

The first thing Timmy needs to is give his AWS Account ID or Service Principle to a member of the DOCtor team to be allowlisted within the IAD region.

{{center start}} 
![Replace Image](creation_process_1)
{{center end}}

As the DOCtor team member said, Timmy is now good to create a DOCtet within IAD and backup regions where Timmy desires (PDX for this example). Timmy then calls CreateDoctet to the Customer API endpoint within IAD using the credentials from the AWS Account ID we previously allowlisted, using the following payload:

{{center start}} 
![Replace Image](creation_process_2)
{{center end}}

When the request hits the Customer API it validates that the SigV4 credentials are valid and then it communicates with ARPS and validates that it has a policy that has the permission for “r53doctor:CreateDoctet”

{{center start}} 
![Replace Image](creation_process_3)
{{center end}}

Then, the Customer API validates a melody of Throttle Rules. 

{{center start}} 
![Replace Image](creation_process_5)
{{center end}}

Next, the Customer API follows this flow:

{{center start}} 
![Replace Image](creation_process_4)
{{center end}}

Here is the workflow of the “Call External Control API’s CreateDoctet in Target Cell”. The External Control API then randomly picks an available Sequence Number within the cell, generates a DOCtet ID, and then starts a Serialized Journal DB Transaction that stores this DOCtet’s configuration into it’s Managed Journal Database as well as fires a “Topic ” containing all the DOCtet’s information directly to the Managed Journal Database Ledger. Should the transaction fail to commit, then within the same External Control API CreateDoctet operation it picks a different available Sequence Number and tries again, it continues this process thirty times before giving up.

{{center start}} 
![Replace Image](creation_process_6)
{{center end}}

The DOCtet ID generated by the External Control API is then sent to the Customer API. The Customer API returns it back to Timmy where he can use this newly created DOCtet ID within a RRSet! This isn't the end of the story, however, as there are some eventually consistent operations that need be carried out.

The service Delta Service is subscribed to the Journal DB Streaming Service listening for DOCtet topics. When it sees a new Topic it then repackages that information into a JSON string and places it within a file and uploads this file into its cell’s to S3 bucket.

{{center start}} 
![Replace Image](creation_process_7)
{{center end}}

`;