diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml new file mode 100644 index 000000000..f2639eeb7 --- /dev/null +++ b/.github/workflows/dev.yml @@ -0,0 +1,29 @@ +name: Deploy Dev +on: + workflow_dispatch: + branches: + - main +jobs: + deploy-dev: + runs-on: ubuntu-latest + steps: + - name: Checkout repo + uses: actions/checkout@v3 + with: + submodules: "true" + - name: Setup NodeJS 18 + uses: actions/setup-node@v3 + with: + node-version: 18 + - name: Install Serverless Framework + run: npm install -g serverless + - name: Serverless AWS authentication + run: sls config credentials --provider aws --key ${{ secrets.AWS_KEY }} --secret ${{ secrets.AWS_SECRET }} + - name: Deploy Lambda functions + run: sls deploy + - name: Export Endpoint URL + run: echo $(sls info --verbose | grep endpoint | sed s/endpoint\:\ //g | awk '{print $1}') > endpoint + - name: Echo Endpoint URL + run: echo $(cat endpoint) + - name: Test Lambda functions + run: "curl -X POST -H 'Content-Type: application/json' -d @prompt.json $(cat endpoint)v1/completions" diff --git a/.github/workflows/rm-dev.yml b/.github/workflows/rm-dev.yml new file mode 100644 index 000000000..88137ef0c --- /dev/null +++ b/.github/workflows/rm-dev.yml @@ -0,0 +1,18 @@ +name: Remove Dev +on: workflow_dispatch + +jobs: + rm-dev: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Setup NodeJS 18 + uses: actions/setup-node@v3 + with: + node-version: 18 + - name: Install Serverless Framework + run: npm install -g serverless + - name: Serverless AWS authentication + run: sls config credentials --provider aws --key ${{ secrets.AWS_KEY }} --secret ${{ secrets.AWS_SECRET }} + - name: Remove Lambda functions + run: sls remove diff --git a/Dockerfile.aws b/Dockerfile.aws new file mode 100644 index 000000000..2f1983c07 --- /dev/null +++ b/Dockerfile.aws @@ -0,0 +1,66 @@ +# REF: https://aws.amazon.com/blogs/aws/new-for-aws-lambda-container-image-support/ +# The download size of `python:3.10-slim-bullseye` is **45MB**¹. Its uncompressed on-disk size is **125MB**¹. +# (1) The best Docker base image for your Python application (March 2023). https://pythonspeed.com/articles/base-image-python-docker-images/. +# (2) Reduce the size of container images with DockerSlim. https://developers.redhat.com/articles/2022/01/17/reduce-size-container-images-dockerslim. +# Define global args +ARG FUNCTION_DIR="/home/app/" +ARG RUNTIME_VERSION="3.10" + +# Stage 1 - bundle base image + runtime +# Grab a fresh copy of the image and install GCC +FROM python:${RUNTIME_VERSION}-slim-bullseye AS python-slim-bullseye + +# Stage 2 - build function and dependencies +FROM python-slim-bullseye AS build-image +# Install aws-lambda-cpp build dependencies +# REF: https://docs.aws.amazon.com/lambda/latest/dg/images-create.html +RUN apt-get update && \ + apt-get install -y \ + libopenblas-dev \ + ninja-build \ + build-essential \ + pkg-config \ + curl + +# Include global args in this stage of the build +ARG FUNCTION_DIR +ARG RUNTIME_VERSION +# Create function directory +RUN mkdir -p ${FUNCTION_DIR} +# Install the function's dependencies +COPY requirements.txt ./ +RUN python${RUNTIME_VERSION} -m pip install -r requirements.txt --target ${FUNCTION_DIR} + +COPY ./ ${FUNCTION_DIR} + +RUN python${RUNTIME_VERSION} -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi uvicorn sse-starlette +# REF: https://github.com/abetlen/llama-cpp-python/blob/main/Dockerfile +RUN cd ${FUNCTION_DIR} && CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" FORCE_CMAKE=1 python${RUNTIME_VERSION} setup.py develop +# Install Lambda Runtime Interface Client for Python +RUN python${RUNTIME_VERSION} -m pip install awslambdaric --target ${FUNCTION_DIR} + +# download the model file +RUN mkdir ${FUNCTION_DIR}/model +RUN curl -L https://huggingface.co/TheBloke/orca_mini_v3_7B-GGML/resolve/main/orca_mini_v3_7b.ggmlv3.q4_0.bin -o ${FUNCTION_DIR}/model/ggml-q4_0.bin + +# Stage 3 - final runtime image +# Grab a fresh copy of the Python image +FROM python-slim-bullseye + +# Install runtime dependencies +RUN apt-get update && \ + apt-get install -y \ + libopenblas-dev + +# Include global arg in this stage of the build +ARG FUNCTION_DIR +# Set working directory to function root directory +WORKDIR ${FUNCTION_DIR} +# Copy in the built dependencies +COPY --from=build-image ${FUNCTION_DIR} ${FUNCTION_DIR} +# (Optional) Add Lambda Runtime Interface Emulator and use a script in the ENTRYPOINT for simpler local runs +ADD https://github.com/aws/aws-lambda-runtime-interface-emulator/releases/latest/download/aws-lambda-rie /usr/bin/aws-lambda-rie +COPY entry.sh / +RUN chmod 755 /usr/bin/aws-lambda-rie /entry.sh +ENTRYPOINT [ "/entry.sh" ] +CMD [ "llama_cpp.server.aws.handler" ] diff --git a/entry.sh b/entry.sh new file mode 100644 index 000000000..a608361e2 --- /dev/null +++ b/entry.sh @@ -0,0 +1,6 @@ +#!/bin/sh +if [ -z "${AWS_LAMBDA_RUNTIME_API}" ]; then + exec /usr/bin/aws-lambda-rie /usr/local/bin/python -m awslambdaric $1 +else + exec /usr/local/bin/python -m awslambdaric $1 +fi diff --git a/llama_cpp/server/aws.py b/llama_cpp/server/aws.py new file mode 100644 index 000000000..46629480c --- /dev/null +++ b/llama_cpp/server/aws.py @@ -0,0 +1,9 @@ +"""AWS Lambda function for llama.cpp. +""" +from mangum import Mangum +from llama_cpp.server.app import create_app, Settings +import os + +print("os.cpu_count()", os.cpu_count()) +handler = Mangum(create_app( + Settings(n_threads=os.cpu_count(), embedding=False))) diff --git a/prompt.json b/prompt.json new file mode 100644 index 000000000..c08f9f7c4 --- /dev/null +++ b/prompt.json @@ -0,0 +1,6 @@ +{ + "prompt": [ + "\n\n### Instructions:\nWhat is the capital of France?\n\n### Response:\n" + ], + "stop": ["\n", "###"] +} diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 000000000..e2b91c07c --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +typing_extensions +fastapi +sse-starlette +mangum +numpy +diskcache +pydantic_settings \ No newline at end of file diff --git a/serverless.yml b/serverless.yml new file mode 100644 index 000000000..d370c0cb1 --- /dev/null +++ b/serverless.yml @@ -0,0 +1,43 @@ +service: llama-cpp + +frameworkVersion: "3" + +provider: + name: aws + deploymentMethod: direct + # REF: https://www.serverless.com/blog/container-support-for-lambda + ecr: + # In this section you can define images that will be built locally and uploaded to ECR + images: + appimage: + path: ./ + file: Dockerfile.aws + stage: dev + region: ap-southeast-1 + iam: + role: + statements: + - Effect: "Allow" + Action: + - "lambda:InvokeFunction" + Resource: "*" + +functions: + chat: + image: + name: appimage + memorySize: 10240 + environment: + MODEL: ./model/ggml-q4_0.bin + timeout: + 900 + # https://www.serverless.com/framework/docs/providers/aws/guide/functions#lambda-function-urls + url: + # Allow CORS for all requests from any origin + cors: + allowedOrigins: + - "*" + #- https://url1.com + #- https://url2.com + allowedMethods: + - POST diff --git a/vendor/llama.cpp b/vendor/llama.cpp index f5bfea058..edcc7ae7d 160000 --- a/vendor/llama.cpp +++ b/vendor/llama.cpp @@ -1 +1 @@ -Subproject commit f5bfea0580e417f99850d5456ca541d871a3e48c +Subproject commit edcc7ae7d26007bbf83136e9d33f863fcad9b871