Initial Project

This commit is contained in:
2024-06-08 12:00:18 +12:00
commit f1c4b2183d
6 changed files with 256 additions and 0 deletions

0
.dockerignore Normal file
View File

1
.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
.vscode

25
Dockerfile Normal file
View File

@@ -0,0 +1,25 @@
FROM nginx:latest AS build
COPY conf/nginx.conf /etc/nginx/nginx.conf
ADD --chmod=a+x entrypoint/docker-entrypoint.sh /docker-entrypoint.sh
# Create the folder structure for the cache
RUN mkdir -p /var/lib/nginx/pypi && \
chown -R www-data:www-data /var/lib/nginx
FROM scratch as final
COPY --from=build / /
# Where the cache data is stored in case you want to persist ;)
VOLUME [ "/var/lib/nginx/pypi" ]
# Nginx is listening on port 80
EXPOSE 80
ENV TZ=Pacific/Auckland
ENTRYPOINT [ "/docker-entrypoint.sh" ]
STOPSIGNAL SIGQUIT
CMD ["nginx", "-g", "daemon off;"]

44
README.md Normal file
View File

@@ -0,0 +1,44 @@
# PYPI Proxy
The contents of this repository will enable building of a proxy based on [NGINX](https://hub.docker.com/_/nginx).
## Why?
This allows NGINX to serve pypi on a custom URL you define for both searching and individual downloads. Once cached by NGINX this will substantially speed up installations. Example you are running CI/CD builds multilple times in a day pulling the same files.
## Usage
To use this proxy see below Compose File for Docker Swarm.
# docker-compose.yml
version: "3.7"
networks:
traefik-net:
external: true
services:
proxy:
image: docker.io/guisea/pypi-proxy:latest
environment:
TZ: Pacific/Auckland
PYPI_HOSTNAME: pypi.example.com
PYPI_FILES_HOSTNAME: pypi-files.example.com
NGINX_ENTRYPOINT_QUIET_LOGS: 1
networks:
- traefik-net
deploy:
mode: replicated
replicas: 1
placement:
constraints: ["node.role=worker"]
labels:
- >
traefik.http.routers.pypi.rule=Host(`pypi.example.com`)
|| Host(`pypi-files.example.com`)
- traefik.http.routers.pypi.tls=true
- traefik.http.services.pypi.loadbalancer.server.port=80
- traefik.http.services.pypi.loadbalancer.server.scheme=http
- traefik.http.routers.pypi.entrypoints=web,websecure
- traefik.http.routers.pypi.tls.certresolver=le
- traefik.enable=true
- traefik.docker.network=traefik-net

144
conf/nginx.conf Normal file
View File

@@ -0,0 +1,144 @@
user www-data;
worker_processes 4;
error_log /dev/stderr;
pid /var/run/nginx.pid;
events {
worker_connections 1024;
}
http {
include /etc/nginx/mime.types;
log_format 2 '$http_x_forwarded_for - $remote_user [$time_local] '
'"$request" $status $body_bytes_sent $upstream_cache_status "$http_referer" '
'"$http_user_agent"' ;
log_format kv 'site="$server_name" server="$host" dest_port="$server_port" dest_ip="$server_addr" '
'src="$remote_addr" src_ip="$realip_remote_addr" user="$remote_user" '
'time_local="$time_local" protocol="$server_protocol" status="$status" '
'cache_status="$upstream_cache_status" bytes_out="$bytes_sent" bytes_in="$upstream_bytes_received" '
'http_referer="$http_referer" http_user_agent="$http_user_agent" '
'nginx_version="$nginx_version" http_x_forwarded_for="$http_x_forwarded_for" '
'http_x_header="$http_x_header" uri_query="$query_string" uri_path="$uri" '
'http_method="$request_method" response_time="$upstream_response_time" '
'cookie="$http_cookie" request_time="$request_time" category="$sent_http_content_type" https="$https"';
access_log /dev/stdout kv;
default_type application/octet-stream;
sendfile on;
tcp_nodelay on;
tcp_nopush off;
reset_timedout_connection on;
server_tokens off;
# Cache 100G worth of packages for up to 1 month
proxy_cache_path /var/lib/nginx/pypi levels=1:2 keys_zone=pypi:16m inactive=1M max_size=100G;
# Multiple server definitions makes nginx retry on errors
upstream pypi {
server pypi.org:443;
server pypi.org:443;
keepalive 16;
}
# Multiple server definitions makes nginx retry on errors
upstream pypi-files {
server files.pythonhosted.org:443;
server files.pythonhosted.org:443;
keepalive 16;
}
gzip on;
gzip_types application/json text/css text/javascript;
gzip_proxied any;
gzip_vary on;
server {
listen 80 default_server;
server_name %%PYPI_HOSTNAME%%;
root /var/www;
proxy_cache pypi;
proxy_cache_key $uri;
proxy_cache_lock on;
proxy_cache_use_stale error timeout updating http_500 http_502 http_503 http_504;
proxy_http_version 1.1;
proxy_set_header Host pypi.org;
proxy_set_header Connection "";
proxy_set_header Accept-Encoding "";
proxy_ssl_name pypi.org;
proxy_ssl_server_name on;
# Rewrite any http redirects to use relative to proxy
proxy_redirect ~https?://pypi.org(.*) $1;
location / {
# Replace any reference to actual pypi w/ caching proxy
sub_filter 'https://pypi.org' $scheme://$host;
sub_filter 'https://files.pythonhosted.org' $scheme://%%PYPI_FILES_HOSTNAME%%;
sub_filter_once off;
sub_filter_types '*';
proxy_pass https://pypi;
proxy_cache off;
}
location ^~ /simple {
sub_filter 'https://pypi.org' $scheme://$host;
sub_filter 'https://files.pythonhosted.org' $scheme://%%PYPI_FILES_HOSTNAME%%;
sub_filter_types '*';
sub_filter_once off;
# Make sure URI ends with /
rewrite ^(.*[^/])$ $1/ break;
add_header X-Cache2 $upstream_cache_status;
proxy_cache_valid any 5m;
proxy_pass https://pypi;
}
}
server {
listen 80;
server_name %%PYPI_FILES_HOSTNAME%%;
root /var/www;
proxy_cache pypi;
proxy_cache_key $uri;
proxy_cache_lock on;
proxy_cache_use_stale error timeout updating http_500 http_502 http_503 http_504;
proxy_http_version 1.1;
proxy_set_header Host files.pythonhosted.org;
proxy_set_header Connection "";
proxy_set_header Accept-Encoding "";
proxy_ssl_name files.pythonhosted.org;
proxy_ssl_server_name on;
# Rewrite any http redirects to use relative to proxy
proxy_redirect ~https?://files.pythonhosted.org(.*) $1;
location / {
# Replace any reference to actual pypi w/ caching proxy
sub_filter 'https://files.pythonhosted.org' $scheme://%%PYPI_FILES_HOSTNAME%%;
sub_filter_once off;
sub_filter_types '*';
proxy_pass https://pypi-files;
proxy_cache off;
}
location ^~ /packages {
add_header X-Cache2 $upstream_cache_status;
proxy_cache_valid any 1M;
proxy_pass https://pypi-files;
}
}
}

View File

@@ -0,0 +1,42 @@
#!/bin/sh
# vim:sw=4:ts=4:et
set -e
if [ -z "${NGINX_ENTRYPOINT_QUIET_LOGS:-}" ]; then
exec 3>&1
else
exec 3>/dev/null
fi
sed -i -e "s|%%PYPI_FILES_HOSTNAME%%|${PYPI_FILES_HOSTNAME}|g" \
-e "s|%%PYPI_HOSTNAME%%|${PYPI_HOSTNAME}|g" \
/etc/nginx/nginx.conf
if [ "$1" = "nginx" -o "$1" = "nginx-debug" ]; then
if /usr/bin/find "/docker-entrypoint.d/" -mindepth 1 -maxdepth 1 -type f -print -quit 2>/dev/null | read v; then
echo >&3 "$0: /docker-entrypoint.d/ is not empty, will attempt to perform configuration"
echo >&3 "$0: Looking for shell scripts in /docker-entrypoint.d/"
find "/docker-entrypoint.d/" -follow -type f -print | sort -V | while read -r f; do
case "$f" in
*.sh)
if [ -x "$f" ]; then
echo >&3 "$0: Launching $f";
"$f"
else
# warn on shell scripts without exec bit
echo >&3 "$0: Ignoring $f, not executable";
fi
;;
*) echo >&3 "$0: Ignoring $f";;
esac
done
echo >&3 "$0: Configuration complete; ready for start up"
else
echo >&3 "$0: No files found in /docker-entrypoint.d/, skipping configuration"
fi
fi
exec "$@"