Skip to content

Commit 722af34

Browse files
authored
refactor(airflow): refactor airflow containers (#200)
* refactor(airflow): refactor airflow containers * use compose-go instead of docker-compose (conda) * Add config for airflow version * configure executor to use postgres connection * Include python environments on airflow containers * install pyenvs via requirements.txt * owid DAG * Include EGH args on dockerfile to create DB connection config on airflow docker image * Finish OWID DAG * Update colombia DAG * Trying to send information through external tasks * remove the external in which was blocking the creation of other tasks, use requests instead * Finish FOPH metadata DAG * remove unnecessary env template
1 parent 6ebc799 commit 722af34

23 files changed

Lines changed: 2977 additions & 1699 deletions

.containers-sugar.yaml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
version: 1.9.0
2+
compose-app: docker-compose
3+
env-file: .env
4+
5+
service-groups:
6+
- name: airflow
7+
project-name: egh-airflow
8+
compose-path:
9+
- containers/compose-airflow.yaml
10+
env-file: containers/airflow/.env
11+
services:
12+
default: webserver,scheduler,worker,triggerer
13+
available:
14+
- name: webserver
15+
- name: scheduler
16+
- name: worker
17+
- name: triggerer
18+
- name: airflow-cli

.env.tpl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ POSTGRES_PASSWORD=${POSTGRES_PASSWORD}
4242
POSTGRES_DB=${POSTGRES_DB}
4343
POSTGRES_DATA_DIR_HOST=${POSTGRES_DATA_DIR_HOST}
4444
POSTGRES_CONFIG_FILE_HOST=${POSTGRES_CONFIG_FILE_HOST}
45+
POSTGRES_EPIGRAPH_HOST=${POSTGRES_EPIGRAPH_HOST}
46+
POSTGRES_EPIGRAPH_PORT=${POSTGRES_EPIGRAPH_PORT}
4547
POSTGRES_EPIGRAPH_USER=${POSTGRES_EPIGRAPH_USER}
4648
POSTGRES_EPIGRAPH_PASSWORD=${POSTGRES_EPIGRAPH_PASSWORD}
4749
POSTGRES_EPIGRAPH_DB=${POSTGRES_EPIGRAPH_DB}

.github/workflows/main.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ env:
3939
POSTGRES_USER: postgres
4040
POSTGRES_PASSWORD: postgres
4141
POSTGRES_DB: postgres
42+
POSTGRES_EPIGRAPH_HOST: postgres
43+
POSTGRES_EPIGRAPH_PORT: 25432
4244
POSTGRES_EPIGRAPH_USER: dev_epigraph
4345
POSTGRES_EPIGRAPH_PASSWORD: dev_epigraph
4446
POSTGRES_EPIGRAPH_DB: dev_epigraphhub

conda/airflow.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
1+
# Note: these dependencies are for dev only. to work on container, they have
2+
# to be in an virtual environment to run in an isolated python version
13
name: epigraphhub
24
channels:
35
- nodefaults
46
- conda-forge
57
dependencies:
6-
- airflow 2.5.2
8+
- airflow 2.7.1
79
- fiona
810
- geopandas
911
- gsheetsdb
@@ -23,5 +25,3 @@ dependencies:
2325
- pip
2426
- pip:
2527
- -r pip.txt
26-
- epigraphhub
27-
- pysus

conda/base.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,13 @@ channels:
55
dependencies:
66
- python 3.9.*
77
- awscli
8-
- docker-compose
98
- git
109
- make
1110
- sqlite
1211
- webdriver-manager
1312
- pip
1413
- pip:
14+
- containers-sugar
15+
- compose-go
1516
- epigraphhub
1617
- "selenium<=4.0"

containers/airflow/Dockerfile

Lines changed: 70 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
1-
# ref: https://github.com/mamba-org/micromamba-docker/blob/main/Dockerfile
2-
3-
FROM condaforge/mambaforge:latest
1+
FROM apache/airflow:2.7.1
42

53
LABEL maintainer="Ivan Ogasawara <ivan.ogasawara@gmail.com>"
64
LABEL org.opencontainers.image.title="EpiGraphHub"
@@ -13,15 +11,8 @@ LABEL org.thegraphnetwork.epigraphhub.version="latest"
1311
# it is the default, but using it here to have it explicitly
1412
USER root
1513

16-
SHELL ["/bin/bash", "-c"]
17-
# Use bash in Dockerfile RUN commands and make sure bashrc is sourced when
18-
# executing commands with /bin/bash -c
19-
# Needed to have the micromamba activate command configured etc.
20-
21-
ENV ENV_NAME=epigraphhub
2214
ENV DEBIAN_FRONTEND=noninteractive
23-
ARG UID=1000
24-
ARG GID=1000
15+
ARG AIRFLOW_UID
2516

2617
RUN apt-get update -y \
2718
&& apt-get install -y --no-install-recommends \
@@ -39,100 +30,85 @@ RUN apt-get update -y \
3930
ca-certificates \
4031
gnupg \
4132
dirmngr \
42-
freetds-bin \
43-
freetds-dev \
44-
gosu \
45-
ldap-utils \
46-
libffi-dev \
47-
libpq-dev \
48-
libsasl2-2 \
49-
libsasl2-dev \
50-
libsasl2-modules \
51-
libssl-dev \
52-
locales \
53-
lsb-release \
54-
nodejs \
55-
openssh-client \
56-
postgresql-client \
57-
sasl2-bin \
58-
software-properties-common \
59-
sqlite3 \
60-
sudo \
61-
unixodbc \
62-
unixodbc-dev \
63-
yarn \
6433
vim \
34+
libssl-dev \
35+
liblzo2-dev \
36+
libpam0g-dev \
37+
zlib1g-dev \
38+
libffi-dev \
39+
libbz2-dev \
40+
libsqlite3-dev \
6541
&& rm -rf /var/lib/apt/lists/* \
6642
/var/cache/apt/archives \
67-
/tmp/* \
68-
&& addgroup --gid ${GID} epigraphhub \
69-
&& useradd --uid ${UID} --gid ${GID} -ms /bin/bash epigraphhub \
70-
&& mkdir -p /opt/EpiGraphHub \
71-
&& chmod -R a+rwx /opt/conda /opt/EpiGraphHub \
72-
&& export ENV_NAME="$ENV_NAME" \
73-
&& echo "epigraphhub ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/epigraphhub \
74-
&& chmod 0440 /etc/sudoers.d/epigraphhub \
75-
&& mkdir -p /opt/superset \
76-
&& chown epigraphhub:epigraphhub /opt/superset \
77-
&& chmod a+rw /var/log/
78-
79-
USER epigraphhub
80-
81-
WORKDIR /opt/EpiGraphHub
82-
83-
COPY --chown=epigraphhub:epigraphhub conda/ /tmp/conda
84-
85-
ENV PATH /opt/conda/envs/$ENV_NAME/bin:$PATH
86-
ENV PYTHONPATH='/opt/superset:/opt/EpiGraphHub'
87-
ENV ANSIBLE_CONFIG='/opt/EpiGraphHub/playbooks/ansible.cfg'
88-
89-
RUN mamba env create -n $ENV_NAME --file /tmp/conda/airflow.yaml \
90-
&& conda clean --all \
91-
&& find /opt/conda/ -type f,l -name '*.a' -delete \
92-
&& find /opt/conda/ -type f,l -name '*.pyc' -delete \
93-
&& find /opt/conda/ -type f,l -name '*.js.map' -delete \
94-
&& rm -rf /opt/conda/pkgs /tmp/*
95-
96-
# note: keeping it to the end of the recipes helps to avoid rebuilding the
97-
# image after every change.
98-
# COPY --chown=epigraphhub:epigraphhub . /opt/EpiGraphHub
99-
100-
COPY --chown=epigraphhub:epigraphhub containers/superset/superset.sh /opt/superset.sh
101-
# note: these files can be overwriten by docker compose volumes in order to
102-
# use the last version without building the image again.
103-
COPY --chown=epigraphhub:epigraphhub containers/superset/ /opt/superset
104-
COPY --chown=epigraphhub:epigraphhub containers/superset/entrypoint.sh /opt/entrypoint.sh
105-
106-
RUN chmod +x /opt/entrypoint.sh \
107-
&& echo "source /opt/entrypoint.sh" > ~/.bashrc \
108-
&& sudo mkdir -p /opt/data/superset/ \
109-
&& sudo chown -R epigraphhub:epigraphhub /opt/data \
110-
&& sudo chown -R epigraphhub:epigraphhub /var/log/*
111-
112-
# note: the steps above were copied from the superset + some apt deps
113-
# needed by airflow
114-
115-
# ref: https://hub.docker.com/r/apache/airflow/dockerfile
116-
117-
ENV AIRFLOW_HOME=/opt/airflow
118-
ENV DEBIAN_FRONTEND=noninteractive
43+
/tmp/*
44+
45+
RUN usermod -u ${AIRFLOW_UID} -g 0 -d /home/airflow -s /bin/bash airflow \
46+
&& echo "airflow ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/airflow \
47+
&& chmod 0440 /etc/sudoers.d/airflow \
48+
&& mkdir -p ${AIRFLOW_HOME}/scripts /opt/envs \
49+
&& chown -R ${AIRFLOW_UID}:0 ${AIRFLOW_HOME} /opt/envs/
50+
51+
RUN curl https://www.python.org/ftp/python/3.10.8/Python-3.10.8.tgz -o /tmp/Python-3.10.8.tgz \
52+
&& tar -zxvf /tmp/Python-3.10.8.tgz -C /tmp \
53+
&& cd /tmp/Python-3.10.8 \
54+
&& ./configure --prefix=/opt/py310 --enable-optimizations \
55+
&& make install \
56+
&& chown -R airflow /opt/py310 \
57+
&& echo "alias python3.10=/opt/py310/bin/python3.10" >> /home/airflow/.bashrc \
58+
&& rm -rf /tmp/Python-3.10*
59+
60+
RUN curl https://www.python.org/ftp/python/3.11.6/Python-3.11.6.tgz -o /tmp/Python-3.11.6.tgz \
61+
&& tar -zxvf /tmp/Python-3.11.6.tgz -C /tmp \
62+
&& cd /tmp/Python-3.11.6 \
63+
&& ./configure --prefix=/opt/py311 --enable-optimizations \
64+
&& make install \
65+
&& chown -R airflow /opt/py311 \
66+
&& echo "alias python3.11=/opt/py311/bin/python3.11" >> /home/airflow/.bashrc \
67+
&& rm -rf /tmp/Python-3.11*
68+
69+
COPY --chown=airflow containers/airflow/config/airflow.cfg ${AIRFLOW_HOME}/airflow.cfg
70+
COPY --chown=airflow containers/airflow/scripts/*.sh ${AIRFLOW_HOME}/scripts/
71+
COPY --chown=airflow containers/airflow/scripts/entrypoint.sh /opt/entrypoint.sh
72+
COPY --chown=airflow containers/airflow/envs/* /opt/envs/
73+
74+
USER airflow
75+
76+
ARG POSTGRES_EPIGRAPH_HOST
77+
ARG POSTGRES_EPIGRAPH_PORT
78+
ARG POSTGRES_EPIGRAPH_USER
79+
ARG POSTGRES_EPIGRAPH_PASSWORD
80+
ARG POSTGRES_EPIGRAPH_DB
81+
ENV DB_USER "${POSTGRES_EPIGRAPH_USER}:${POSTGRES_EPIGRAPH_PASSWORD}"
82+
ENV DB_URI "${DB_USER}@${POSTGRES_EPIGRAPH_HOST}:${POSTGRES_EPIGRAPH_PORT}/${POSTGRES_EPIGRAPH_DB}"
83+
84+
RUN /usr/local/bin/python -m virtualenv /opt/envs/py310 --python="/opt/py310/bin/python3.10" \
85+
&& sed -i "s/include-system-site-packages = false/include-system-site-packages = true/" /opt/envs/py310/pyvenv.cfg \
86+
&& source /opt/envs/py310/bin/activate \
87+
&& pip install "cython<3.0.0" \
88+
&& pip install --no-build-isolation "pyyaml<6.0" \
89+
&& pip install -r /opt/envs/epigraphhub.txt \
90+
&& epigraphhub-config --name "epigraphhub" --db-uri "${DB_URI}"
91+
92+
RUN /usr/local/bin/python -m virtualenv /opt/envs/py311 --python="/opt/py311/bin/python3.11" \
93+
&& sed -i "s/include-system-site-packages = false/include-system-site-packages = true/" /opt/envs/py311/pyvenv.cfg \
94+
&& source /opt/envs/py311/bin/activate \
95+
&& pip install "cython<3.0.0" \
96+
&& pip install --no-build-isolation "pyyaml<6.0" \
97+
&& pip install -r /opt/envs/pysus.txt
98+
99+
WORKDIR ${AIRFLOW_HOME}
119100

120101
# ref: https://stackoverflow.com/questions/44331836/apt-get-install-tzdata-noninteractive
121102
RUN sudo ln -fs /usr/share/zoneinfo/America/New_York /etc/localtime
122103

123104
RUN sudo mkdir -p /opt/scripts /sources /opt/airflow \
124-
&& sudo chown -R epigraphhub:epigraphhub /opt/scripts \
125-
&& sudo chown -R epigraphhub:epigraphhub /sources \
126-
&& sudo chown -R epigraphhub:epigraphhub /opt/airflow \
105+
&& sudo chown -R airflow /opt/scripts \
106+
&& sudo chown -R airflow /sources \
107+
&& sudo chown -R airflow /opt/airflow \
127108
&& sudo touch /var/log/owid_fetch.log \
128109
&& sudo touch /var/log/foph_fetch.log \
129110
&& sudo touch /var/log/colombia_fetch.log \
130-
&& sudo chown -R epigraphhub:epigraphhub /var/log/*
131-
132-
COPY --chown=epigraphhub ./containers/airflow/airflow.cfg /opt/airflow/airflow.cfg
133-
COPY --chown=epigraphhub ./containers/airflow/scripts/*.sh /opt/scripts/
134-
COPY --chown=epigraphhub ./containers/airflow/scripts/entrypoint.sh /opt/entrypoint.sh
135-
COPY --chown=epigraphhub ./containers/airflow/scripts/webserver_config.py /opt/airflow/webserver_config.py
111+
&& sudo chown -R airflow /var/log/*
136112

137113
ENTRYPOINT [ "/opt/entrypoint.sh" ]
138114
CMD /opt/scripts/startup.sh

containers/airflow/README.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
Building Airflow:
2+
```sh
3+
sugar build --group airflow
4+
```
5+
6+
Starting containers:
7+
```sh
8+
sugar up --options -d --group airflow
9+
```

0 commit comments

Comments
 (0)