Infra/Airflow

[Airflow] ubuntu 18, python 3.6 airflow

sonny.kim 2021. 10. 19. 10:03

Dependencies

sudo apt-get update
sudo apt-get install -y \
  python3-pip \
  postgresql \
  postgresql-contrib \
  redis \
  nginx

Configure postresql

sudo -u postgres bash -c "createdb airflow"
sudo -u postgres bash -c "createuser airflow --pwprompt"

Create airflow user

sudo adduser airflow

su airflow
cd ~/

Install airflow (pip)

pip3 install --upgrade pip
pip3 install --user apache-airflow[postgres,celery,redis] email_validator

echo "export PATH=$PATH:$HOME/.local/bin" >> ~/.bashrc
source ~/.bashrc

#check airflow version
airflow version  # -> 2.2.0

Config airflow

vi ~/airflow/airflow.cfg

[core]

# Connection string to the local Postgres database
sql_alchemy_conn = postgresql+psycopg2://airflow:password@localhost:5432/airflow

# Class name of the executor
executor = CeleryExecutor
load_examples = False


[webserver]

# Run a single gunicorn process for handling requests.
workers = 1

# Require password authentication to the webserver
authenticate = True
rbac = True


[celery]

broker_url = redis://localhost:6379/0
result_backend = db+postgresql://airflow:password@localhost:5432/airflow

Airlow init db

airflow db init

Create airflow admin user

airflow users create \
  --username <your name> \
  --role Admin \
  --email <your email> \
  --firstname <your first name> \
  --lastname <your last name> \
  --password <your password>

Start airflow services

# start service
airflow webserver --daemon
airflow scheduler --daemon
airflow celery worker --daemon

# kill all "airflow" process
sudo kill $(ps -ef | grep "airflow" | awk '{print $2}')

NGINX configure (under a normal user not airflow user)

sudo vi /etc/nginx/sites-available/airflow

server {
  listen 80;
  listen [::]:80;

  # Replace with your own server's URL and/or IP address. Note that
  # supplying a single value, such as an IP address, is okay.
  server_name airflow.corbettanalytics.com 34.218.228.207;

  location / {
    proxy_pass http://0.0.0.0:8080;
  }
}

sudo ln -s /etc/nginx/sites-available/airflow /etc/nginx/sites-enabled

if you want to use a custom domain, 

sudo vi /etc/nginx/nginx.conf
http {
     server_names_hash_bucket_size 256;
}

Optional: enforce HTTPS only

 

sudo apt-get install software-properties-common sudo add-apt-repository universe sudo add-apt-repository ppa:certbot/certbot sudo apt-get update

sudo certbot --nginx

 

Deeper...

버킷플레이스 Airflow 도입기 - 오늘의집 블로그 (bucketplace.co.kr)