diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml
new file mode 100644
index 000000000..f2639eeb7
--- /dev/null
+++ b/.github/workflows/dev.yml
@@ -0,0 +1,29 @@
+name: Deploy Dev
+on:
+  workflow_dispatch:
+    branches:
+      - main
+jobs:
+  deploy-dev:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v3
+        with:
+          submodules: "true"
+      - name: Setup NodeJS 18
+        uses: actions/setup-node@v3
+        with:
+          node-version: 18
+      - name: Install Serverless Framework
+        run: npm install -g serverless
+      - name: Serverless AWS authentication
+        run: sls config credentials --provider aws --key ${{ secrets.AWS_KEY }} --secret ${{ secrets.AWS_SECRET }}
+      - name: Deploy Lambda functions
+        run: sls deploy
+      - name: Export Endpoint URL
+        run: echo $(sls info --verbose | grep endpoint | sed s/endpoint\:\ //g | awk '{print $1}') > endpoint
+      - name: Echo Endpoint URL
+        run: echo $(cat endpoint)
+      - name: Test Lambda functions
+        run: "curl -X POST -H 'Content-Type: application/json' -d @prompt.json $(cat endpoint)v1/completions"
diff --git a/.github/workflows/rm-dev.yml b/.github/workflows/rm-dev.yml
new file mode 100644
index 000000000..88137ef0c
--- /dev/null
+++ b/.github/workflows/rm-dev.yml
@@ -0,0 +1,18 @@
+name: Remove Dev
+on: workflow_dispatch
+
+jobs:
+  rm-dev:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - name: Setup NodeJS 18
+        uses: actions/setup-node@v3
+        with:
+          node-version: 18
+      - name: Install Serverless Framework
+        run: npm install -g serverless
+      - name: Serverless AWS authentication
+        run: sls config credentials --provider aws --key ${{ secrets.AWS_KEY }} --secret ${{ secrets.AWS_SECRET }}
+      - name: Remove Lambda functions
+        run: sls remove
diff --git a/Dockerfile.aws b/Dockerfile.aws
new file mode 100644
index 000000000..2f1983c07
--- /dev/null
+++ b/Dockerfile.aws
@@ -0,0 +1,66 @@
+# REF: https://aws.amazon.com/blogs/aws/new-for-aws-lambda-container-image-support/
+# The download size of `python:3.10-slim-bullseye` is **45MB**¹. Its uncompressed on-disk size is **125MB**¹.
+# (1) The best Docker base image for your Python application (March 2023). https://pythonspeed.com/articles/base-image-python-docker-images/.
+# (2) Reduce the size of container images with DockerSlim. https://developers.redhat.com/articles/2022/01/17/reduce-size-container-images-dockerslim.
+# Define global args
+ARG FUNCTION_DIR="/home/app/"
+ARG RUNTIME_VERSION="3.10"
+
+# Stage 1 - bundle base image + runtime
+# Grab a fresh copy of the image and install GCC
+FROM python:${RUNTIME_VERSION}-slim-bullseye AS python-slim-bullseye
+
+# Stage 2 - build function and dependencies
+FROM python-slim-bullseye AS build-image
+# Install aws-lambda-cpp build dependencies
+# REF: https://docs.aws.amazon.com/lambda/latest/dg/images-create.html
+RUN apt-get update && \
+    apt-get install -y \
+    libopenblas-dev \
+    ninja-build \
+    build-essential \
+    pkg-config \
+    curl
+
+# Include global args in this stage of the build
+ARG FUNCTION_DIR
+ARG RUNTIME_VERSION
+# Create function directory
+RUN mkdir -p ${FUNCTION_DIR}
+# Install the function's dependencies
+COPY requirements.txt ./
+RUN python${RUNTIME_VERSION} -m pip install -r requirements.txt --target ${FUNCTION_DIR}
+
+COPY ./ ${FUNCTION_DIR}
+
+RUN python${RUNTIME_VERSION} -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi uvicorn sse-starlette
+# REF: https://github.com/abetlen/llama-cpp-python/blob/main/Dockerfile
+RUN cd ${FUNCTION_DIR} && CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" FORCE_CMAKE=1 python${RUNTIME_VERSION} setup.py develop
+# Install Lambda Runtime Interface Client for Python
+RUN python${RUNTIME_VERSION} -m pip install awslambdaric --target ${FUNCTION_DIR}
+
+# download the model file
+RUN mkdir ${FUNCTION_DIR}/model
+RUN curl -L https://huggingface.co/TheBloke/orca_mini_v3_7B-GGML/resolve/main/orca_mini_v3_7b.ggmlv3.q4_0.bin -o ${FUNCTION_DIR}/model/ggml-q4_0.bin
+
+# Stage 3 - final runtime image
+# Grab a fresh copy of the Python image
+FROM python-slim-bullseye
+
+# Install runtime dependencies
+RUN apt-get update && \
+    apt-get install -y \
+    libopenblas-dev
+
+# Include global arg in this stage of the build
+ARG FUNCTION_DIR
+# Set working directory to function root directory
+WORKDIR ${FUNCTION_DIR}
+# Copy in the built dependencies
+COPY --from=build-image ${FUNCTION_DIR} ${FUNCTION_DIR}
+# (Optional) Add Lambda Runtime Interface Emulator and use a script in the ENTRYPOINT for simpler local runs
+ADD https://github.com/aws/aws-lambda-runtime-interface-emulator/releases/latest/download/aws-lambda-rie /usr/bin/aws-lambda-rie
+COPY entry.sh /
+RUN chmod 755 /usr/bin/aws-lambda-rie /entry.sh
+ENTRYPOINT [ "/entry.sh" ]
+CMD [ "llama_cpp.server.aws.handler" ]
diff --git a/entry.sh b/entry.sh
new file mode 100644
index 000000000..a608361e2
--- /dev/null
+++ b/entry.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+if [ -z "${AWS_LAMBDA_RUNTIME_API}" ]; then
+    exec /usr/bin/aws-lambda-rie /usr/local/bin/python -m awslambdaric $1
+else
+    exec /usr/local/bin/python -m awslambdaric $1
+fi
diff --git a/llama_cpp/server/aws.py b/llama_cpp/server/aws.py
new file mode 100644
index 000000000..46629480c
--- /dev/null
+++ b/llama_cpp/server/aws.py
@@ -0,0 +1,9 @@
+"""AWS Lambda function for llama.cpp.
+"""
+from mangum import Mangum
+from llama_cpp.server.app import create_app, Settings
+import os
+
+print("os.cpu_count()", os.cpu_count())
+handler = Mangum(create_app(
+    Settings(n_threads=os.cpu_count(), embedding=False)))
diff --git a/prompt.json b/prompt.json
new file mode 100644
index 000000000..c08f9f7c4
--- /dev/null
+++ b/prompt.json
@@ -0,0 +1,6 @@
+{
+  "prompt": [
+    "\n\n### Instructions:\nWhat is the capital of France?\n\n### Response:\n"
+  ],
+  "stop": ["\n", "###"]
+}
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 000000000..e2b91c07c
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,7 @@
+typing_extensions
+fastapi
+sse-starlette
+mangum
+numpy
+diskcache
+pydantic_settings
\ No newline at end of file
diff --git a/serverless.yml b/serverless.yml
new file mode 100644
index 000000000..d370c0cb1
--- /dev/null
+++ b/serverless.yml
@@ -0,0 +1,43 @@
+service: llama-cpp
+
+frameworkVersion: "3"
+
+provider:
+  name: aws
+  deploymentMethod: direct
+  # REF: https://www.serverless.com/blog/container-support-for-lambda
+  ecr:
+    # In this section you can define images that will be built locally and uploaded to ECR
+    images:
+      appimage:
+        path: ./
+        file: Dockerfile.aws
+  stage: dev
+  region: ap-southeast-1
+  iam:
+    role:
+      statements:
+        - Effect: "Allow"
+          Action:
+            - "lambda:InvokeFunction"
+          Resource: "*"
+
+functions:
+  chat:
+    image:
+      name: appimage
+    memorySize: 10240
+    environment:
+      MODEL: ./model/ggml-q4_0.bin
+    timeout:
+      900
+      # https://www.serverless.com/framework/docs/providers/aws/guide/functions#lambda-function-urls
+    url:
+      # Allow CORS for all requests from any origin
+      cors:
+        allowedOrigins:
+          - "*"
+          #- https://url1.com
+          #- https://url2.com
+        allowedMethods:
+          - POST
diff --git a/vendor/llama.cpp b/vendor/llama.cpp
index f5bfea058..edcc7ae7d 160000
--- a/vendor/llama.cpp
+++ b/vendor/llama.cpp
@@ -1 +1 @@
-Subproject commit f5bfea0580e417f99850d5456ca541d871a3e48c
+Subproject commit edcc7ae7d26007bbf83136e9d33f863fcad9b871