02 - Processing Change Data Capture - Example

CDC Feed Example

Step 1: Create the target table with CDF enabled

CREATE TABLE customer_info (
  customer_id INT,
  name STRING,
  email STRING,
  last_updated TIMESTAMP
)
USING delta
TBLPROPERTIES (delta.enableChangeDataFeed = true);

Step 2: Insert initial data

INSERT INTO customer_info VALUES
  (1, 'John Doe', 'john@example.com', current_timestamp()),
  (2, 'Jane Smith', 'jane@example.com', current_timestamp());

Step 3: Create a source table with updated data

CREATE TABLE customer_updates (
  customer_id INT,
  name STRING,
  email STRING
)
USING delta;

INSERT INTO customer_updates VALUES
  (1, 'John Doe', 'john.doe@newmail.com'),  -- Updated email
  (2, 'Jane Smith', 'jane@example.com'),    -- No change
  (3, 'Alice Johnson', 'alice@example.com'); -- New customer

Step 4: Perform the MERGE Operation

MERGE INTO customer_info AS target
USING customer_updates AS source
ON target.customer_id = source.customer_id
WHEN MATCHED THEN
  UPDATE SET
    target.name = source.name,
    target.email = source.email,
    target.last_updated = current_timestamp()
WHEN NOT MATCHED THEN
  INSERT (customer_id, name, email, last_updated)
  VALUES (source.customer_id, source.name, source.email, current_timestamp());