From 01fe142850550b2657805f77abbe0d5ee76d2c03 Mon Sep 17 00:00:00 2001 From: Oscar Krause Date: Tue, 3 Jan 2023 15:22:49 +0100 Subject: [PATCH 01/11] .gitlab-ci.yml - fixed release job --- .gitlab-ci.yml | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 0a0d788..6f0e7a8 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -267,20 +267,31 @@ deploy:pacman: - 'echo "EXPORT_NAME: ${EXPORT_NAME}"' - 'curl --header "JOB-TOKEN: $CI_JOB_TOKEN" --upload-file ${EXPORT_NAME} "${CI_API_V4_URL}/projects/${CI_PROJECT_ID}/packages/generic/${PACKAGE_NAME}/${PACKAGE_VERSION}/${EXPORT_NAME}"' -release: - image: registry.gitlab.com/gitlab-org/release-cli:latest - stage: .post +release:prepare: + stage: .pre rules: - if: $CI_COMMIT_TAG when: never - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH - before_script: - - set -a # make variables from "source" command available to release-cli + script: - source version.env + - echo &VERSION + artifacts: + reports: + dotenv: version.env + +release: + image: registry.gitlab.com/gitlab-org/release-cli:latest + stage: .post + needs: + - job: release:prepare + artifacts: true + rules: + - if: $CI_COMMIT_TAG + when: never + - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH script: - echo "Running release-job for $VERSION" - after_script: - - set +a release: name: $CI_PROJECT_TITLE $version description: Release of $CI_PROJECT_TITLE version $VERSION From d6cc6dcbee2773997160dae602de596e96fa5af9 Mon Sep 17 00:00:00 2001 From: Oscar Krause Date: Tue, 3 Jan 2023 17:38:32 +0100 Subject: [PATCH 02/11] fixes --- app/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/main.py b/app/main.py index 303f81d..6012e4d 100644 --- a/app/main.py +++ b/app/main.py @@ -491,7 +491,7 @@ async def leasing_v1_lessor_lease_remove(request: Request): @app.post('/leasing/v1/lessor/shutdown', description='shutdown all leases') async def leasing_v1_lessor_shutdown(request: Request): - j, cur_time = json.loads((await request.body()).decode('utf-8')) + j, cur_time = json.loads((await request.body()).decode('utf-8')), datetime.utcnow() token = j.get('token') token = jwt.decode(token=token, key=jwt_decode_key, algorithms=ALGORITHMS.RS256, options={'verify_aud': False}) From f1369d5e258324ebb920ff7b6c1fae2d96321e20 Mon Sep 17 00:00:00 2001 From: Oscar Krause Date: Tue, 3 Jan 2023 17:38:45 +0100 Subject: [PATCH 03/11] added some docs --- doc/Reverse Engineering Notes.md | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/Reverse Engineering Notes.md b/doc/Reverse Engineering Notes.md index 920a2c3..0233a64 100644 --- a/doc/Reverse Engineering Notes.md +++ b/doc/Reverse Engineering Notes.md @@ -34,6 +34,7 @@ nvidia-gridd[2986]: License acquired successfully. (Info: license.nvidia.space, Most variables and configs are stored in `/var/lib/docker/volumes/configurations/_data`. Files can be modified with `docker cp :/venv/... /opt/localfile/...` and back. +(May you need to fix permissions with `docker exec -u 0 chown nonroot:nonroot /venv/...`) ## Dive / Docker image inspector From 5575fee382cfea6b85ee15c429f36ed681904d08 Mon Sep 17 00:00:00 2001 From: Oscar Krause Date: Tue, 3 Jan 2023 17:39:10 +0100 Subject: [PATCH 04/11] fixed config test --- test/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/main.py b/test/main.py index f04de99..48a99bf 100644 --- a/test/main.py +++ b/test/main.py @@ -50,7 +50,7 @@ def test_health(): def test_config(): - response = client.get('/-/') + response = client.get('/-/config') assert response.status_code == 200 From ac811d5df79171ba63fa9a82b818ca4ace5c55a0 Mon Sep 17 00:00:00 2001 From: Oscar Krause Date: Tue, 3 Jan 2023 18:04:21 +0100 Subject: [PATCH 05/11] added 'LEASE_EXPIRE_HOURS' variable for better debugging --- app/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/main.py b/app/main.py index 6012e4d..7569522 100644 --- a/app/main.py +++ b/app/main.py @@ -41,7 +41,7 @@ ALLOTMENT_REF = str(env('ALLOTMENT_REF', '20000000-0000-0000-0000-000000000001') INSTANCE_KEY_RSA = load_key(str(env('INSTANCE_KEY_RSA', join(dirname(__file__), 'cert/instance.private.pem')))) INSTANCE_KEY_PUB = load_key(str(env('INSTANCE_KEY_PUB', join(dirname(__file__), 'cert/instance.public.pem')))) TOKEN_EXPIRE_DELTA = relativedelta(hours=1) # days=1 -LEASE_EXPIRE_DELTA = relativedelta(days=int(env('LEASE_EXPIRE_DAYS', 90))) +LEASE_EXPIRE_DELTA = relativedelta(days=int(env('LEASE_EXPIRE_DAYS', 90)), hours=int(env('LEASE_EXPIRE_HOURS', 0))) LEASE_RENEWAL_PERIOD = float(env('LEASE_RENEWAL_PERIOD', 0.15)) CORS_ORIGINS = str(env('CORS_ORIGINS', '')).split(',') if (env('CORS_ORIGINS')) else [f'https://{DLS_URL}'] From 7b2428ea386207628a14c6821c6f939544f41858 Mon Sep 17 00:00:00 2001 From: Oscar Krause Date: Tue, 3 Jan 2023 18:04:50 +0100 Subject: [PATCH 06/11] removed some debugging --- app/orm.py | 1 - 1 file changed, 1 deletion(-) diff --git a/app/orm.py b/app/orm.py index efc5853..aadc0ef 100644 --- a/app/orm.py +++ b/app/orm.py @@ -41,7 +41,6 @@ class Origin(Base): def create_or_update(engine: Engine, origin: "Origin"): session = sessionmaker(bind=engine)() entity = session.query(Origin).filter(Origin.origin_ref == origin.origin_ref).first() - print(entity) if entity is None: session.add(origin) else: From 7e6e523799067dbc666efc251380fa51c692ea35 Mon Sep 17 00:00:00 2001 From: Oscar Krause Date: Tue, 3 Jan 2023 18:05:12 +0100 Subject: [PATCH 07/11] improved test (checking uuid are 36 chars long) --- test/main.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/test/main.py b/test/main.py index 48a99bf..67856c9 100644 --- a/test/main.py +++ b/test/main.py @@ -184,7 +184,9 @@ def test_leasing_v1_lessor(): lease_result_list = response.json().get('lease_result_list') assert len(lease_result_list) == 1 + assert len(lease_result_list[0]['lease']['ref']) == 36 assert str(UUID(lease_result_list[0]['lease']['ref'])) == lease_result_list[0]['lease']['ref'] + return lease_result_list[0]['lease']['ref'] @@ -194,33 +196,38 @@ def test_leasing_v1_lessor_lease(): active_lease_list = response.json().get('active_lease_list') assert len(active_lease_list) == 1 + assert len(active_lease_list[0]) == 36 assert str(UUID(active_lease_list[0])) == active_lease_list[0] def test_leasing_v1_lease_renew(): response = client.get('/leasing/v1/lessor/leases', headers={'authorization': __bearer_token(ORIGIN_REF)}) active_lease_list = response.json().get('active_lease_list') - lease_ref = active_lease_list[0] + active_lease_ref = active_lease_list[0] ### - response = client.put(f'/leasing/v1/lease/{lease_ref}', headers={'authorization': __bearer_token(ORIGIN_REF)}) + response = client.put(f'/leasing/v1/lease/{active_lease_ref}', headers={'authorization': __bearer_token(ORIGIN_REF)}) assert response.status_code == 200 - assert response.json().get('lease_ref') == lease_ref + lease_ref = response.json().get('lease_ref') + assert len(lease_ref) == 36 + assert lease_ref == active_lease_ref def test_leasing_v1_lease_delete(): response = client.get('/leasing/v1/lessor/leases', headers={'authorization': __bearer_token(ORIGIN_REF)}) active_lease_list = response.json().get('active_lease_list') - lease_ref = active_lease_list[0] + active_lease_ref = active_lease_list[0] ### - response = client.delete(f'/leasing/v1/lease/{lease_ref}', headers={'authorization': __bearer_token(ORIGIN_REF)}) + response = client.delete(f'/leasing/v1/lease/{active_lease_ref}', headers={'authorization': __bearer_token(ORIGIN_REF)}) assert response.status_code == 200 - assert response.json().get('lease_ref') == lease_ref + lease_ref = response.json().get('lease_ref') + assert len(lease_ref) == 36 + assert lease_ref == active_lease_ref def test_leasing_v1_lessor_lease_remove(): @@ -231,4 +238,5 @@ def test_leasing_v1_lessor_lease_remove(): released_lease_list = response.json().get('released_lease_list') assert len(released_lease_list) == 1 + assert len(released_lease_list[0]) == 36 assert released_lease_list[0] == lease_ref From 41980212129c4bffcfce9297d5bc768c66b98b7a Mon Sep 17 00:00:00 2001 From: Oscar Krause Date: Tue, 3 Jan 2023 18:10:02 +0100 Subject: [PATCH 08/11] README.md - fixed windows issue with `/leasing/v1/lessor/shutdown` --- README.md | 25 ++++++++++- docker-compose.yml | 108 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 131 insertions(+), 2 deletions(-) create mode 100644 docker-compose.yml diff --git a/README.md b/README.md index e64effa..9642367 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,8 @@ Only the clients need a connection to this service on configured port. ## ToDo's -- Support http mode for using external https proxy (disable uvicorn ssl for using behind proxy) +- check why windows guests display "can't acquire license" although in log there is no message displayed and license is + also acquired successfully ## Endpoints @@ -102,6 +103,8 @@ docker run -e DLS_URL=`hostname -i` -e DLS_PORT=443 -p 443:443 -v $WORKING_DIR:/ **Docker-Compose / Deploy stack** +Goto [`docker-compose.yml`](docker-compose.yml) for more advanced example. + ```yaml version: '3.9' @@ -439,7 +442,10 @@ Dec 20 17:53:34 ubuntu-grid-server nvidia-gridd[10354]: License acquired success -### Error on releasing leases on shutdown +### Error on releasing leases on shutdown (fixed in 1.3 by using reverse proxy) + +**UPDATE for version `1.3`**: This issue can be fixed by using a reverse proxy (e.g. `nginx`). Please read section +below. The driver wants to release current leases on shutting down windows. This endpoint needs to be a http endpoint and is currently not implemented. The error message looks like and safely can be ignored (since we have no license @@ -452,6 +458,21 @@ limitation :P): <0>:End Logging ``` +#### log with 1.3 and nginx as reverse proxy + +``` +<1>:NLS initialized +<2>:NLS initialized +<1>:Valid GRID license not found. GPU features and performance will be fully degraded. To enable full functionality please configure licensing details. +<1>:License acquired successfully. (Info: 192.168.178.33, NVIDIA RTX Virtual Workstation; Expiry: 2023-1-4 16:48:20 GMT) +<2>:Valid GRID license not found. GPU features and performance will be fully degraded. To enable full functionality please configure licensing details. +<2>:License acquired successfully from local trusted store. (Info: 192.168.178.33, NVIDIA RTX Virtual Workstation; Expiry: 2023-1-4 16:48:20 GMT) +<2>:End Logging +<1>:End Logging +<0>:License returned successfully. (Info: 192.168.178.33) +<0>:End Logging +``` + # Credits Thanks to vGPU community and all who uses this project and report bugs. diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..2411067 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,108 @@ +version: '3.9' + +x-dls-variables: &dls-variables + DLS_URL: localhost # REQUIRED + DLS_PORT: 443 # must match nginx listen port + LEASE_EXPIRE_DAYS: 90 + DATABASE: sqlite:////app/database/db.sqlite + DEBUG: false + +services: + web: + image: nginx + ports: + # thees are ports where nginx (!) is listen to + - "80:80" # for "/leasing/v1/lessor/shutdown" used by windows guests, can't be changed! + - "443:443" # first part must match "DLS_PORT" + volumes: + - /opt/docker/fastapi-dls/cert:/opt/cert + healthcheck: + test: [ "CMD", "curl", "--insecure", "--fail", "https://localhost/-/health" ] + interval: 10s + timeout: 5s + retries: 3 + start_period: 30s + command: | + bash -c 'bash -s <<"EOF" + cat > /etc/nginx/nginx.conf <<"EON" + daemon off; + user root; + worker_processes auto; + + events { + worker_connections 1024; + } + + http { + gzip on; + gzip_disable "msie6"; + include /etc/nginx/mime.types; + + upstream dls-backend { + server dls:443; + } + + server { + listen 443 ssl http2 default_server; + listen [::]:443 ssl http2 default_server; + + root /var/www/html; + index index.html; + server_name _; + + ssl_certificate "/opt/cert/webserver.crt"; + ssl_certificate_key "/opt/cert/webserver.key"; + ssl_session_cache shared:SSL:1m; + ssl_session_timeout 10m; + ssl_protocols TLSv1.3 TLSv1.2; + # ssl_ciphers "ECDHE-ECDSA-CHACHA20-POLY1305"; + # ssl_ciphers PROFILE=SYSTEM; + ssl_prefer_server_ciphers on; + + location / { + proxy_ssl_verify off; + proxy_set_header Host $$http_host; + proxy_set_header X-Real-IP $$remote_addr; + proxy_set_header X-Forwarded-For $$proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $$scheme; + proxy_pass https://dls-backend$$request_uri; + } + } + + server { + listen 80; + listen [::]:80; + + root /var/www/html; + index index.html; + server_name _; + + location /leasing/v1/lessor/shutdown { + proxy_ssl_verify off; + proxy_set_header Host $$http_host; + proxy_set_header X-Real-IP $$remote_addr; + proxy_set_header X-Forwarded-For $$proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $$scheme; + proxy_pass https://dls-backend/leasing/v1/lessor/shutdown; + } + + location / { + return 301 https://dls-backend$$request_uri; + } + } + } + EON + cat /etc/nginx/nginx.conf + nginx + EOF' + dls: + image: collinwebdesigns/fastapi-dls:latest + restart: always + environment: + <<: *dls-variables + volumes: + - /opt/docker/fastapi-dls/cert:/app/cert + - db:/app/database + +volumes: + db: From 7e3f2d0345296179a367ca204a89e0a5c087381e Mon Sep 17 00:00:00 2001 From: Oscar Krause Date: Tue, 3 Jan 2023 18:44:30 +0100 Subject: [PATCH 09/11] docker-compose.yml - fixes --- docker-compose.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 2411067..bb33609 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -87,14 +87,14 @@ services: } location / { - return 301 https://dls-backend$$request_uri; + return 301 https://$$host$$request_uri; } } } EON - cat /etc/nginx/nginx.conf nginx EOF' + dls: image: collinwebdesigns/fastapi-dls:latest restart: always From 5bb8437b1d615febc94cc119f4abef9dc7507908 Mon Sep 17 00:00:00 2001 From: Oscar Krause Date: Tue, 3 Jan 2023 18:59:34 +0100 Subject: [PATCH 10/11] README.md - added timestamp to linux token filename --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9642367..4e96236 100644 --- a/README.md +++ b/README.md @@ -319,7 +319,7 @@ Successfully tested with this package versions: ## Linux ```shell -curl --insecure -L -X GET https:///client-token -o /etc/nvidia/ClientConfigToken/client_configuration_token.tok +curl --insecure -L -X GET https:///client-token -o /etc/nvidia/ClientConfigToken/client_configuration_token_$(date '+%d-%m-%Y-%H-%M-%S').tok service nvidia-gridd restart nvidia-smi -q | grep "License" ``` From 27f47b93b88f1fc88ef4896ac1f74b05638d3c75 Mon Sep 17 00:00:00 2001 From: Oscar Krause Date: Tue, 3 Jan 2023 20:45:16 +0100 Subject: [PATCH 11/11] docker-compose.yml - added experimental health endpoint --- docker-compose.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docker-compose.yml b/docker-compose.yml index bb33609..77dcc07 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -67,6 +67,12 @@ services: proxy_set_header X-Forwarded-Proto $$scheme; proxy_pass https://dls-backend$$request_uri; } + + location = /-/health { + access_log off; + add_header 'Content-Type' 'application/json'; + return 200; # '{\"status\":\"up\",\"service\":\"nginx\"}'; + } } server {