add table update trigger example

zanitarahimi · zanitarahimi · commit 02cad2996d52 · 2026-02-13T13:48:08.000+01:00
diff --git a/knowledge_base/pydabs_job_table_update_trigger/README.md b/knowledge_base/pydabs_job_table_update_trigger/README.md
@@ -0,0 +1,68 @@
+# pydabs_job_table_update_trigger
+
+This example demonstrates a workflow when producers write to Unity Catalog tables, consumers can trigger on table updates instead of time‑based schedules.
+
+
+The Lakeflow Job is configured with:
+- **Table update trigger**: To run a job when new data is ready without the need for a continuously running cluster or knowledge of the processes that update a table.
+- **Configurable wait times**: 
+  - Minimum time between triggers: 0 seconds
+  - Wait after last file change: 3600 seconds
+- **Automatic processing**: When updates are detected, the job automatically runs and processes them
+
+* `src/`: Notebook source code for this project.
+  * `src/assets/consume_table.py`: 
+* `resources/`:  Resource configurations (jobs, pipelines, etc.)
+  * `resources/table_update.py`: PyDABs job with table update trigger configuration.
+
+
+## Getting started
+
+Choose how you want to work on this project:
+
+(a) Directly in your Databricks workspace, see
+    https://docs.databricks.com/dev-tools/bundles/workspace.
+
+(b) Locally with an IDE like Cursor or VS Code, see
+    https://docs.databricks.com/vscode-ext.
+
+(c) With command line tools, see https://docs.databricks.com/dev-tools/cli/databricks-cli.html
+
+If you're developing with an IDE, dependencies for this project should be installed using uv:
+
+*  Make sure you have the UV package manager installed.
+   It's an alternative to tools like pip: https://docs.astral.sh/uv/getting-started/installation/.
+*  Run `uv sync --dev` to install the project's dependencies.
+
+
+# Using this project using the CLI
+
+The Databricks workspace and IDE extensions provide a graphical interface for working
+with this project. It's also possible to interact with it directly using the CLI:
+
+1. Authenticate to your Databricks workspace, if you have not done so already:
+    ```
+    $ databricks configure
+    ```
+
+2. To deploy a development copy of this project, type:
+    ```
+    $ databricks bundle deploy --target dev
+    ```
+    (Note that "dev" is the default target, so the `--target` parameter
+    is optional here.)
+
+    This deploys everything that's defined for this project.
+    For example, this project will deploy a job called
+    `[dev yourname] table_update_example` to your workspace.
+    You can find that resource by opening your workspace and clicking on **Jobs & Pipelines**.
+
+3. Development vs. Production behavior
+   - Dev target (mode: development): Schedules and automatic triggers are disabled by design, so the job will not auto-fire on file arrival. Use manual runs to test the logic. 
+     You can also manually run it with:
+
+     ```
+     $ databricks bundle run table_update_example
+     ```
+   - Prod target (mode: production): Automatic triggers are active. Uploading a file to the configured Unity Catalog Volume path will trigger the job run when the trigger evaluates.
+   
diff --git a/knowledge_base/pydabs_job_table_update_trigger/databricks.yml b/knowledge_base/pydabs_job_table_update_trigger/databricks.yml
@@ -0,0 +1,21 @@
+# This is a Databricks asset bundle definition for pydabs_airflow.
+# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation.
+bundle:
+  name: pydabs_job_table_update_trigger
+
+python:
+  venv_path: .venv
+  # Functions called to load resources defined in Python. See resources/__init__.py
+  resources:
+    - "resources:load_resources"
+
+include:
+  - resources/*.yml
+  - resources/*/*.yml
+
+targets:
+  dev:
+    mode: development
+    default: true
+    workspace:
+      host: https://myworkspace.databricks.com
diff --git a/knowledge_base/pydabs_job_table_update_trigger/pyproject.toml b/knowledge_base/pydabs_job_table_update_trigger/pyproject.toml
@@ -0,0 +1,26 @@
+[project]
+name = "pydabs_job_table_update_trigger"
+version = "0.0.1"
+authors = [{ name = "Databricks Field Engineering" }]
+requires-python = ">=3.10,<=3.13"
+dependencies = [
+    # Any dependencies for jobs and pipelines in this project can be added here
+    # See also https://docs.databricks.com/dev-tools/bundles/library-dependencies
+    #
+    # LIMITATION: for pipelines, dependencies are cached during development;
+    # add dependencies to the 'environment' section of pipeline.yml file instead
+]
+
+[dependency-groups]
+dev = [
+    "pytest",
+    "databricks-connect>=15.4,<15.5",
+    "databricks-bundles==0.275.0",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[tool.black]
+line-length = 125
diff --git a/knowledge_base/pydabs_job_table_update_trigger/resources/__init__.py b/knowledge_base/pydabs_job_table_update_trigger/resources/__init__.py
@@ -0,0 +1,16 @@
+from databricks.bundles.core import (
+    Bundle,
+    Resources,
+    load_resources_from_current_package_module,
+)
+
+
+def load_resources(bundle: Bundle) -> Resources:
+    """
+    'load_resources' function is referenced in databricks.yml and is responsible for loading
+    bundle resources defined in Python code. This function is called by Databricks CLI during
+    bundle deployment. After deployment, this function is not used.
+    """
+
+    # the default implementation loads all Python files in 'resources' directory
+    return load_resources_from_current_package_module()
diff --git a/knowledge_base/pydabs_job_table_update_trigger/resources/table_update.py b/knowledge_base/pydabs_job_table_update_trigger/resources/table_update.py
@@ -0,0 +1,16 @@
+from databricks.bundles.jobs import Job, NotebookTask, Task, TableUpdateTriggerConfiguration
+
+consume_table = Task(
+    task_key="consume_table",
+    notebook_task=NotebookTask(notebook_path="src/assets/consume_table.py"),
+)
+
+job = Job(
+    name="table_update_example",
+    trigger=TableUpdateTriggerConfiguration(
+        table_names=["main.analytics.daily_events"],
+        min_time_between_triggers_seconds=0,
+        wait_after_last_change_seconds=3600,
+    ),
+    tasks=[consume_table],
+)
diff --git a/knowledge_base/pydabs_job_table_update_trigger/src/assets/consume_table.py b/knowledge_base/pydabs_job_table_update_trigger/src/assets/consume_table.py
@@ -0,0 +1,5 @@
+from pyspark.sql import functions as F
+
+source_table = "main.analytics.daily_events"
+# Insert consumer logic here
+df = spark.read.table(source_table)