From f1c4b2183d6aac32b085368f0f0d6d24c9451409 Mon Sep 17 00:00:00 2001 From: Aaron Guise Date: Sat, 8 Jun 2024 12:00:18 +1200 Subject: [PATCH] Initial Project --- .dockerignore | 0 .gitignore | 1 + Dockerfile | 25 ++++++ README.md | 44 ++++++++++ conf/nginx.conf | 144 ++++++++++++++++++++++++++++++++ entrypoint/docker-entrypoint.sh | 42 ++++++++++ 6 files changed, 256 insertions(+) create mode 100644 .dockerignore create mode 100644 .gitignore create mode 100644 Dockerfile create mode 100644 README.md create mode 100644 conf/nginx.conf create mode 100644 entrypoint/docker-entrypoint.sh diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..e69de29 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..722d5e7 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.vscode diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..2f2b072 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,25 @@ +FROM nginx:latest AS build + +COPY conf/nginx.conf /etc/nginx/nginx.conf +ADD --chmod=a+x entrypoint/docker-entrypoint.sh /docker-entrypoint.sh + +# Create the folder structure for the cache +RUN mkdir -p /var/lib/nginx/pypi && \ + chown -R www-data:www-data /var/lib/nginx + +FROM scratch as final + +COPY --from=build / / + +# Where the cache data is stored in case you want to persist ;) +VOLUME [ "/var/lib/nginx/pypi" ] +# Nginx is listening on port 80 +EXPOSE 80 + +ENV TZ=Pacific/Auckland + +ENTRYPOINT [ "/docker-entrypoint.sh" ] + +STOPSIGNAL SIGQUIT + +CMD ["nginx", "-g", "daemon off;"] diff --git a/README.md b/README.md new file mode 100644 index 0000000..35af2ad --- /dev/null +++ b/README.md @@ -0,0 +1,44 @@ +# PYPI Proxy + +The contents of this repository will enable building of a proxy based on [NGINX](https://hub.docker.com/_/nginx). + +## Why? + +This allows NGINX to serve pypi on a custom URL you define for both searching and individual downloads. Once cached by NGINX this will substantially speed up installations. Example you are running CI/CD builds multilple times in a day pulling the same files. + +## Usage + +To use this proxy see below Compose File for Docker Swarm. + + # docker-compose.yml + version: "3.7" + + networks: + traefik-net: + external: true + services: + proxy: + image: docker.io/guisea/pypi-proxy:latest + environment: + TZ: Pacific/Auckland + PYPI_HOSTNAME: pypi.example.com + PYPI_FILES_HOSTNAME: pypi-files.example.com + NGINX_ENTRYPOINT_QUIET_LOGS: 1 + networks: + - traefik-net + deploy: + mode: replicated + replicas: 1 + placement: + constraints: ["node.role=worker"] + labels: + - > + traefik.http.routers.pypi.rule=Host(`pypi.example.com`) + || Host(`pypi-files.example.com`) + - traefik.http.routers.pypi.tls=true + - traefik.http.services.pypi.loadbalancer.server.port=80 + - traefik.http.services.pypi.loadbalancer.server.scheme=http + - traefik.http.routers.pypi.entrypoints=web,websecure + - traefik.http.routers.pypi.tls.certresolver=le + - traefik.enable=true + - traefik.docker.network=traefik-net diff --git a/conf/nginx.conf b/conf/nginx.conf new file mode 100644 index 0000000..0fcf585 --- /dev/null +++ b/conf/nginx.conf @@ -0,0 +1,144 @@ +user www-data; +worker_processes 4; + +error_log /dev/stderr; +pid /var/run/nginx.pid; + + +events { + worker_connections 1024; +} + +http { + include /etc/nginx/mime.types; + log_format 2 '$http_x_forwarded_for - $remote_user [$time_local] ' + '"$request" $status $body_bytes_sent $upstream_cache_status "$http_referer" ' + '"$http_user_agent"' ; + + log_format kv 'site="$server_name" server="$host" dest_port="$server_port" dest_ip="$server_addr" ' + 'src="$remote_addr" src_ip="$realip_remote_addr" user="$remote_user" ' + 'time_local="$time_local" protocol="$server_protocol" status="$status" ' + 'cache_status="$upstream_cache_status" bytes_out="$bytes_sent" bytes_in="$upstream_bytes_received" ' + 'http_referer="$http_referer" http_user_agent="$http_user_agent" ' + 'nginx_version="$nginx_version" http_x_forwarded_for="$http_x_forwarded_for" ' + 'http_x_header="$http_x_header" uri_query="$query_string" uri_path="$uri" ' + 'http_method="$request_method" response_time="$upstream_response_time" ' + 'cookie="$http_cookie" request_time="$request_time" category="$sent_http_content_type" https="$https"'; + access_log /dev/stdout kv; + + default_type application/octet-stream; + + sendfile on; + tcp_nodelay on; + tcp_nopush off; + + reset_timedout_connection on; + + server_tokens off; + + # Cache 100G worth of packages for up to 1 month + proxy_cache_path /var/lib/nginx/pypi levels=1:2 keys_zone=pypi:16m inactive=1M max_size=100G; + + # Multiple server definitions makes nginx retry on errors + upstream pypi { + server pypi.org:443; + server pypi.org:443; + keepalive 16; + } + + # Multiple server definitions makes nginx retry on errors + upstream pypi-files { + server files.pythonhosted.org:443; + server files.pythonhosted.org:443; + keepalive 16; + } + + gzip on; + gzip_types application/json text/css text/javascript; + gzip_proxied any; + gzip_vary on; + + server { + listen 80 default_server; + server_name %%PYPI_HOSTNAME%%; + + root /var/www; + + proxy_cache pypi; + proxy_cache_key $uri; + proxy_cache_lock on; + proxy_cache_use_stale error timeout updating http_500 http_502 http_503 http_504; + + proxy_http_version 1.1; + proxy_set_header Host pypi.org; + proxy_set_header Connection ""; + proxy_set_header Accept-Encoding ""; + proxy_ssl_name pypi.org; + proxy_ssl_server_name on; + + # Rewrite any http redirects to use relative to proxy + proxy_redirect ~https?://pypi.org(.*) $1; + + location / { + # Replace any reference to actual pypi w/ caching proxy + sub_filter 'https://pypi.org' $scheme://$host; + sub_filter 'https://files.pythonhosted.org' $scheme://%%PYPI_FILES_HOSTNAME%%; + sub_filter_once off; + sub_filter_types '*'; + proxy_pass https://pypi; + proxy_cache off; + } + + location ^~ /simple { + sub_filter 'https://pypi.org' $scheme://$host; + sub_filter 'https://files.pythonhosted.org' $scheme://%%PYPI_FILES_HOSTNAME%%; + sub_filter_types '*'; + sub_filter_once off; + # Make sure URI ends with / + rewrite ^(.*[^/])$ $1/ break; + + add_header X-Cache2 $upstream_cache_status; + + proxy_cache_valid any 5m; + + proxy_pass https://pypi; + } + } + + server { + listen 80; + server_name %%PYPI_FILES_HOSTNAME%%; + + root /var/www; + + proxy_cache pypi; + proxy_cache_key $uri; + proxy_cache_lock on; + proxy_cache_use_stale error timeout updating http_500 http_502 http_503 http_504; + + proxy_http_version 1.1; + proxy_set_header Host files.pythonhosted.org; + proxy_set_header Connection ""; + proxy_set_header Accept-Encoding ""; + proxy_ssl_name files.pythonhosted.org; + proxy_ssl_server_name on; + + # Rewrite any http redirects to use relative to proxy + proxy_redirect ~https?://files.pythonhosted.org(.*) $1; + + location / { + # Replace any reference to actual pypi w/ caching proxy + sub_filter 'https://files.pythonhosted.org' $scheme://%%PYPI_FILES_HOSTNAME%%; + sub_filter_once off; + sub_filter_types '*'; + proxy_pass https://pypi-files; + proxy_cache off; + } + + location ^~ /packages { + add_header X-Cache2 $upstream_cache_status; + proxy_cache_valid any 1M; + proxy_pass https://pypi-files; + } + } +} \ No newline at end of file diff --git a/entrypoint/docker-entrypoint.sh b/entrypoint/docker-entrypoint.sh new file mode 100644 index 0000000..0f03b73 --- /dev/null +++ b/entrypoint/docker-entrypoint.sh @@ -0,0 +1,42 @@ +#!/bin/sh +# vim:sw=4:ts=4:et + +set -e + +if [ -z "${NGINX_ENTRYPOINT_QUIET_LOGS:-}" ]; then + exec 3>&1 +else + exec 3>/dev/null +fi + +sed -i -e "s|%%PYPI_FILES_HOSTNAME%%|${PYPI_FILES_HOSTNAME}|g" \ + -e "s|%%PYPI_HOSTNAME%%|${PYPI_HOSTNAME}|g" \ + /etc/nginx/nginx.conf + +if [ "$1" = "nginx" -o "$1" = "nginx-debug" ]; then + if /usr/bin/find "/docker-entrypoint.d/" -mindepth 1 -maxdepth 1 -type f -print -quit 2>/dev/null | read v; then + echo >&3 "$0: /docker-entrypoint.d/ is not empty, will attempt to perform configuration" + + echo >&3 "$0: Looking for shell scripts in /docker-entrypoint.d/" + find "/docker-entrypoint.d/" -follow -type f -print | sort -V | while read -r f; do + case "$f" in + *.sh) + if [ -x "$f" ]; then + echo >&3 "$0: Launching $f"; + "$f" + else + # warn on shell scripts without exec bit + echo >&3 "$0: Ignoring $f, not executable"; + fi + ;; + *) echo >&3 "$0: Ignoring $f";; + esac + done + + echo >&3 "$0: Configuration complete; ready for start up" + else + echo >&3 "$0: No files found in /docker-entrypoint.d/, skipping configuration" + fi +fi + +exec "$@"