大语言模型 Phi-3 LLMs
生成代码 Granite Code
生成文本向量 Generate Embeddings for Text Retrieval (agent-blueprints)
视觉语义分割ChangeNet
光学字符检测OCDRNet
统计图表理解deplot(agent-blueprints)
轻量级 具有强大的数学和逻辑推理能力大语言模型
https://build.nvidia.com/microsoft/phi-3-mini-4k
其强大的推理能力和逻辑能力使其成为内容生成、摘要、问答和情感分析任务的理想选择
# 导入OpenAI库
from openai import OpenAI
# 初始化OpenAI客户端,配置base_url和api_key
# base_url指向NVIDIA的API服务
# api_key是用于身份验证的密钥,如果在NGC外部执行则需要提供
client = OpenAI(
base_url = "https://integrate.api.nvidia.com/v1",
api_key = "$API_KEY_REQUIRED_IF_EXECUTING_OUTSIDE_NGC"
)
# 创建聊天完成请求
# 选择使用microsoft的phi-3-mini-4k-instruct模型
# 请求内容是生成一首关于GPU计算奇迹的limerick诗
# 设置生成参数:temperature控制随机性,top_p控制多样性,max_tokens限制最大生成长度,stream设置为True以流式接收结果
completion = client.chat.completions.create(
model="microsoft/phi-3-mini-4k-instruct",
messages=[{"role":"user","content":"Write a limerick about the wonders of GPU computing."}],
temperature=0.2,
top_p=0.7,
max_tokens=1024,
stream=True
)
# 流式处理生成的结果
# 遍历每个返回的块,检查内容是否非空并逐块打印
for chunk in completion:
if chunk.choices[0].delta.content is not None:
print(chunk.choices[0].delta.content, end="")
https://build.nvidia.com/ibm/granite-34b-code-instruct
# 导入OpenAI库
from openai import OpenAI
# 初始化OpenAI客户端,配置基础URL和API密钥
# 基础URL指向NVIDIA的API服务
# API密钥在NGC外部执行时需要,以验证身份
client = OpenAI(
base_url = "https://integrate.api.nvidia.com/v1",
api_key = "$API_KEY_REQUIRED_IF_EXECUTING_OUTSIDE_NGC"
)
# 创建聊天完成对象,使用特定的模型来生成Python函数代码
# 模型选择:ibm/granite-34b-code-instruct
# 请求模型编写一个计算阶乘的Python函数
# 参数说明:
# model: 使用的预训练模型名称
# messages: 用户输入的角色和内容
# temperature: 控制输出随机性的参数,值越小输出越确定,值越大输出越随机
# top_p: 控制输出质量的参数,值越小输出越高质量,但速度会变慢
# max_tokens: 模型生成的最多token数,防止输出过长
# stream: 是否以流式方式获取结果
completion = client.chat.completions.create(
model="ibm/granite-34b-code-instruct",
messages=[{"role":"user","content":"Write a Python function to calculate the factorial of a number."}],
temperature=0.5,
top_p=1,
max_tokens=1024,
stream=True
)
# 以流式处理生成的结果
# 遍历每个返回的代码块,即时打印出生成的函数代码
for chunk in completion:
if chunk.choices[0].delta.content is not None:
print(chunk.choices[0].delta.content, end="")
https://build.nvidia.com/nvidia/nv-embedqa-e5-v5
# 导入OpenAI库
from openai import OpenAI
# 初始化OpenAI客户端,需要API密钥进行身份验证,以及设置基础URL以指向NVIDIA的API服务
client = OpenAI(
api_key="$API_KEY_REQUIRED_IF_EXECUTING_OUTSIDE_NGC",
base_url="https://integrate.api.nvidia.com/v1"
)
# 创建嵌入(embedding),将文本转换为数值向量,便于机器学习模型处理
# 请求参数包括输入文本、使用的模型、编码格式,以及输入类型和截断选项
response = client.embeddings.create(
input=["What is the capital of France?"],
model="nvidia/nv-embedqa-e5-v5",
encoding_format="float",
extra_body={"input_type": "query", "truncate": "NONE"}
)
# 打印返回的嵌入数据,只关心第一个结果的嵌入向量
print(response.data[0].embedding)
https://build.nvidia.com/nvidia/visual-changenet?snippet_tab=Python
该模型可以检测两个图像之间像素级的变化进行映射,并对图片进行语义分割
import os
import sys
import uuid
import zipfile
import requests
# NVCF API endpoints
nvai_url = "https://ai.api.nvidia.com/v1/cv/nvidia/visual-changenet"
header_auth = f"Bearer $API_KEY_REQUIRED_IF_EXECUTING_OUTSIDE_NGC"
def _upload_asset(input, description):
"""
Uploads an asset to the NVCF API.
:param input: The binary asset to upload
:param description: A description of the asset
:return: The UUID of the uploaded asset
"""
# Request an upload URL from the NVCF API
authorize = requests.post(
"https://api.nvcf.nvidia.com/v2/nvcf/assets",
headers={
"Authorization": header_auth,
"Content-Type": "application/json",
"accept": "application/json",
},
json={"contentType": "image/jpeg", "description": description},
timeout=30,
)
authorize.raise_for_status()
# Upload the asset to the obtained URL
response = requests.put(
authorize.json()["uploadUrl"],
data=input,
headers={
"x-amz-meta-nvcf-asset-description": description,
"content-type": "image/jpeg",
},
timeout=300,
)
response.raise_for_status()
return uuid.UUID(authorize.json()["assetId"])
if __name__ == "__main__":
"""Uploads two images of your choosing to the NVCF API and sends a request
to the Visual ChangeNet model to compare them. The response is saved to
<output_dir>
"""
if len(sys.argv) != 4:
print("Usage: python test.py <reference_image> <test_image> <output_dir>")
sys.exit(1)
# Upload the local images to NVCF API
asset_id1 = _upload_asset(open(sys.argv[1], "rb"), "Reference Image")
asset_id2 = _upload_asset(open(sys.argv[2], "rb"), "Test Image")
# Prepare the input data for the API request
inputs = {"reference_image": f"{asset_id1}", "test_image": f"{asset_id2}"}
asset_list = f"{asset_id1} , {asset_id2}"
# Set the headers for the API request
headers = {
"Content-Type": "application/json",
"NVCF-INPUT-ASSET-REFERENCES": asset_list,
"NVCF-FUNCTION-ASSET-IDS": asset_list,
"Authorization": header_auth,
}
# Send the request to the NVCF API
response = requests.post(nvai_url, headers=headers, json=inputs)
# Save the response content to a ZIP file
with open(f"{sys.argv[3]}.zip", "wb") as out:
out.write(response.content)
# Extract the ZIP file contents to the output directory
with zipfile.ZipFile(f"{sys.argv[3]}.zip", "r") as z:
z.extractall(sys.argv[3])
# Notify the user of the completed operation
print(f"Response saved to {sys.argv[3]}")
print(os.listdir(sys.argv[3]))
https://build.nvidia.com/nvidia/ocdrnet?snippet_tab=Python
用于光学字符检测和识别的预训练模型
import os
import sys
import uuid
import zipfile
import requests
# NVCF API 的OC DRNet模型端点
nvai_url = "https://ai.api.nvidia.com/v1/cv/nvidia/ocdrnet"
# 授权头,需要API key,如果在NGC之外执行
header_auth = f"Bearer $API_KEY_REQUIRED_IF_EXECUTING_OUTSIDE_NGC"
def _upload_asset(input, description):
"""
上传一个资产到NVCF API。
:param input: 要上传的二进制资产
:param description: 资产的描述
:return: 资产ID(UUID对象)
"""
assets_url = "https://api.nvcf.nvidia.com/v2/nvcf/assets"
# 设置请求头
headers = {
"Authorization": header_auth,
"Content-Type": "application/json",
"accept": "application/json",
}
# 设置S3请求头
s3_headers = {
"x-amz-meta-nvcf-asset-description": description,
"content-type": "image/jpeg",
}
# 设置请求负载
payload = {"contentType": "image/jpeg", "description": description}
# 发送POST请求以获取上传URL和资产ID
response = requests.post(assets_url, headers=headers, json=payload, timeout=30)
response.raise_for_status()
asset_url = response.json()["uploadUrl"]
asset_id = response.json()["assetId"]
# 发送PUT请求上传资产
response = requests.put(
asset_url,
data=input,
headers=s3_headers,
timeout=300,
)
response.raise_for_status()
# 返回资产ID
return uuid.UUID(asset_id)
if __name__ == "__main__":
"""
上传一张自选图片到NVCF API,并向光学字符检测和识别模型发送请求。
响应保存到本地目录。
注意:必须设置环境变量NGC_PERSONAL_API_KEY。
"""
# 检查命令行参数数量
if len(sys.argv) != 3:
print("Usage: python test.py <image> <output_dir>")
sys.exit(1)
# 上传资产并获取资产ID
asset_id = _upload_asset(open(sys.argv[1], "rb"), "Input Image")
# 设置请求负载
inputs = {"image": f"{asset_id}", "render_label": False}
# 设置资产列表
asset_list = f"{asset_id}"
# 设置请求头
headers = {
"Content-Type": "application/json",
"NVCF-INPUT-ASSET-REFERENCES": asset_list,
"NVCF-FUNCTION-ASSET-IDS": asset_list,
"Authorization": header_auth,
}
# 发送POST请求到OC DRNet模型
response = requests.post(nvai_url, headers=headers, json=inputs)
# 保存响应内容到本地ZIP文件
with open(f"{sys.argv[2]}.zip", "wb") as out:
out.write(response.content)
# 解压ZIP文件到输出目录
with zipfile.ZipFile(f"{sys.argv[2]}.zip", "r") as z:
z.extractall(sys.argv[2])
# 输出保存目录和目录内容
print(f"Output saved to {sys.argv[2]}")
print(os.listdir(sys.argv[2]))
https://build.nvidia.com/google/google-deplot?snippet_tab=Python
将统计图表转换为数据结构json格式
# 导入requests库和base64编码库
import requests, base64
# 设置推理URL和流式传输标志
invoke_url = "https://ai.api.nvidia.com/v1/vlm/google/deplot"
stream = True
# 以二进制模式打开图像文件并进行base64编码
with open("economic-assistance-chart.png", "rb") as f:
image_b64 = base64.b64encode(f.read()).decode()
# 确保编码后的图像大小小于180,000字节,否则需要使用资产API
assert len(image_b64) < 180_000, \
"To upload larger images, use the assets API (see docs)"
# 准备请求头,需要API密钥进行身份验证
headers = {
"Authorization": "Bearer $API_KEY_REQUIRED_IF_EXECUTING_OUTSIDE_NGC",
"Accept": "text/event-stream" if stream else "application/json"
}
# 准备请求负载,包含用户消息、最大令牌数、温度等参数
payload = {
"messages": [
{
"role": "user",
"content": f'Generate underlying data table of the figure below: <img src="data:image/png;base64,{image_b64}" />'
}
],
"max_tokens": 1024,
"temperature": 0.20,
"top_p": 0.20,
"stream": stream
}
# 发送POST请求到指定的推理URL
response = requests.post(invoke_url, headers=headers, json=payload)
# 根据是否开启流式传输,处理响应
if stream:
for line in response.iter_lines():
if line:
print(line.decode("utf-8"))
else:
print(response.json())
对图片转换为表格数据库/csv/json形式存储,结合generate code生成Sql或者结合大预言模型,进行分析给出总结
https://zhuanlan.zhihu.com/p/715083726
https://blog.csdn.net/weixin_44756966/article/details/141289212
https://mp.weixin.qq.com/s/_s6iuod9dyxYB-jTjYxWYQ