- Notifications
You must be signed in to change notification settings - Fork 3
Python
IntelliNode edited this page Mar 9, 2025 · 3 revisions
importrequestsimportjsonurl="/semanticsearch/search"payload=json.dumps({"api_key": "", "provider": "openai", "input":{"pivot_item": "Hello from OpenAI!", "search_array": [ "Greetings from OpenAI!", "Bonjour de OpenAI!", "Hola desde OpenAI!" ], "number_of_matches": 2 } }) headers={'X-API-KEY': '<microservice-key>', 'Content-Type': 'application/json' } response=requests.request("POST", url, headers=headers, data=payload) print(response.text)importrequestsimportjsonurl="/chatbot/chat"payload=json.dumps({"api_key": "", "model": "gpt4", "provider": "openai", "input":{"system": "You are a helpful assistant.", "messages": [{"role": "user", "content": "recommend a vegetarian dish for dinner" } ] } }) headers={'X-API-KEY': '<microservice-key>', 'Content-Type': 'application/json' } response=requests.request("POST", url, headers=headers, data=payload) print(response.text)You can also use a self-hosted vLLM model.
importrequestsurl="http://localhost/chatbot/chat"payload={"provider": "vllm", "serverLink": "<vllm-server-url>", "model": "mistralai/Mistral-7B-Instruct-v0.2", "input":{"system": "You are a helpful assistant.", "messages": [{"role": "user", "content": "Explain quantum computing briefly." } ] } } headers={'X-API-KEY': 'root', 'Content-Type': 'application/json' } response=requests.post(url, json=payload, headers=headers) print(response.json())You can run evaluations across multiple models and select the suitable model for your use case based on quantitive methods.
Below is an example to compare llama 13b-chat, openai gpt-3.5, and cohere command models.
importrequestsimportjsonurl="http://localhost/evaluate/llm"payload=json.dumps({"userInput": "User input or question.", "targetAnswers": [ "optimal answer example1.", "optimal answer example2.", "optimal answer example3." ], "semantic":{"api_key": "", "provider": "openai" }, "evaluate": [{"apiKey": "", "provider": "replicate", "type": "chat", "model": "13b-chat", "maxTokens": 50 },{"apiKey": "", "provider": "cohere", "type": "completion", "model": "command", "maxTokens": 50 },{"apiKey": "", "provider": "openai", "type": "chat", "model": "gpt-3.5-turbo", "maxTokens": 50, "temperature": 0.7 } ] }) headers={'X-API-KEY': '<microservice-key>', 'Content-Type': 'application/json' } response=requests.request("POST", url, headers=headers, data=payload) print(response.text)Snapshot of the expected output format:
{"openai/gpt-3.5-turbo": [{"prediction": "Photosynthesis is how plants make food for themselves....", "score_cosine_similarity": 0.9566836802012463, "score_euclidean_distance": 0.29175853870023755 }], "cohere/command": [{"prediction": "Photosynthesis is the process by which plants use the energy .....", "score_cosine_similarity": 0.9378139154300577, "score_euclidean_distance": 0.3512465738424273 }], "replicate/13b-chat": [{"prediction": "Here's an explanation of photosynthesis in simple terms .....", "score_cosine_similarity": 0.9096764395396765, "score_euclidean_distance": 0.4248874961328429 }], "lookup":{"cosine_similarity": "a value closer to 1 indicates a higher degree of similarity between two vectors", "euclidean_distance": "the lower the value, the closer the two points" } }Replace the <microservice-key> with the provided microservice key when running the docker image.