我正在使用 transformers 库处理 PyTorch 模型 (AutoModelForCausalLM),并遇到了与张量类型和运算符支持相关的 RuntimeError。这是我的简化版本
我正在使用 transformers 库处理 PyTorch 模型 (AutoModelForCausalLM),并遇到与张量类型和运算符支持相关的 RuntimeError。这是我的代码的简化版本:
import torch
import requests
from PIL import Image
from IPython.display import display
from transformers import AutoModelForCausalLM, LlamaTokenizer
# Load tokenizer and model
tokenizer = LlamaTokenizer.from_pretrained('lmsys/vicuna-7b-v1.5')
model = AutoModelForCausalLM.from_pretrained(
'THUDM/cogvlm-chat-hf',
torch_dtype=torch.float16, # Using torch.float16
low_cpu_mem_usage=True,
trust_remote_code=True
).eval()
def generate(query: str, img_url: str, max_length: int = 2048) -> str:
image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
display(image)
# Generate token inputs
inputs = model.build_conversation_input_ids(tokenizer, query=query, history=[], images=[image], template_version='vqa')
# Convert tensors to appropriate types
input_ids = inputs['input_ids'].unsqueeze(0).to(torch.long)
token_type_ids = inputs['token_type_ids'].unsqueeze(0).to(torch.long)
attention_mask = inputs['attention_mask'].unsqueeze(0).to(torch.float16)
images = [[inputs['images'][0].to(torch.float16)]]
inputs = {
'input_ids': input_ids,
'token_type_ids': token_type_ids,
'attention_mask': attention_mask,
'images': images,
}
gen_kwargs = {"max_length": max_length, "do_sample": False}
with torch.no_grad():
outputs = model.generate(**inputs, **gen_kwargs)
outputs = outputs[:, input_ids.shape[1]:]
return tokenizer.decode(outputs[0])
query = 'Describe this image in detail'
img_url = 'https://i.ibb.co/x1nH9vr/Slide1.jpg'
generate(query, img_url)
上面的代码会引发以下错误:
NotImplementedError: No operator found for `memory_efficient_attention_forward` with inputs:
query : shape=(1, 1226, 16, 112) (torch.float16)
key : shape=(1, 1226, 16, 112) (torch.float16)
value : shape=(1, 1226, 16, 112) (torch.float16)
attn_bias : <class 'NoneType'>
p : 0.0
`ck_decoderF` is not supported because:
device=cpu (supported: {'cuda'})
operator wasn't built - see `python -m xformers.info` for more info
`ckF` is not supported because:
device=cpu (supported: {'cuda'})
operator wasn't built - see `python -m xformers.info` for more info
我正在尝试 torch.float16
在 CPU 上 PyTorch 的张量 (device=cpu)
。该模型已从 torch.float16
库中 AutoModelForCausalLM
加载 transformers
。但是,我遇到了 NotImplementedError
提示,指出该 memory_efficient_attention_forward
运算符在 CPU 上不受支持 torch.float16
.
有没有办法 memory_efficient_attention_forward
在 CPU 上工作 torch.float16
?是否有其他方法或配置可以考虑解决此问题?
我正在尝试在配备 Intel Core i7 处理器的 MacBook PRO 上运行它。
我已经运行了训练过程,但对于评估,我只在最后一个检查点步骤运行,有人可以帮我提供输入,以便我可以每 100 步更改检查点分配,所以......
我已经运行了训练过程,但对于评估,我只在最后一个检查点步骤运行,有人可以帮我提供输入,以便我可以每 100 步更改检查点分配,以便评估图出现在 tensorboard 上
尝试每 100 步进行评估,但它仍然在最后一个检查点进行评估,尽管我尝试使用 save_checkpoints_steps,但它不受支持,我不知道为什么
我对 Python 还很陌生,但想尝试为我的汽车项目制作一个很酷的视觉系统。最初我打算使用带有 Coral TPU 的 Arducam IMX462,但出现了一些问题...
我对 Python 非常陌生,但我想尝试为我在汽车上做的一个项目制作一个很酷的视觉系统。最初,我打算使用带有 Coral TPU 的 Arducam IMX462,但 PI 5 出现了一些问题,我似乎无法完成任何工作以使 Coral TPU 在 Python 3.11 上工作。因此,我们现在改变了课程,使用热像仪作为我们的相机,我认为从长远来看这会更好。这也意味着我可以回到 ras Pi 4 并获取 USB TPU 并让它工作并获得大约 15/20 fps,这对于这个项目来说应该足够了。我唯一的问题是,由于现在对热像仪有原生支持,我正在使用为该型号(Topdon TC001)创建的程序,它运行良好。我唯一的问题是当我添加对象检测部分时,出现一个我不熟悉的错误。
这是我得到的错误RuntimeError:给定组=1,权重大小为[16,3,3,3],预期输入[1,2,480,640]有3个通道,但得到的却是2个通道
不,我认为这可能与热图像的处理方式有关,但我不能 100% 确定。我希望这条路线能够奏效,因为这在夜间会很棒,而夜间是我唯一需要物体检测真正发挥作用的区域。任何帮助都将不胜感激,以找出为什么会发生该错误。当然,我知道你们可能不会有这个热像仪来自己测试,但任何帮助仍然不胜感激。
热像仪的源代码位于 Github
-如果代码显示不正确,请谅解,我以前从未使用过堆栈溢出。
import cv2
import numpy as np
import argparse
import time
import io
from ultralytics import YOLO
#We need to know if we are running on the Pi, because openCV behaves a little oddly on all the builds!
#https://raspberrypi.stackexchange.com/questions/5100/detect-that-a-python-program-is-running-on-the-pi
def is_raspberrypi():
try:
with io.open('/sys/firmware/devicetree/base/model', 'r') as m:
if 'raspberry pi' in m.read().lower(): return True
except Exception: pass
return False
isPi = is_raspberrypi()
parser = argparse.ArgumentParser()
parser.add_argument("--device", type=int, default=0, help="Video Device number e.g. 0, use v4l2-ctl --list-devices")
args = parser.parse_args()
if args.device:
dev = args.device
else:
dev = 0
#init video
cap = cv2.VideoCapture('/dev/video'+str(dev), cv2.CAP_V4L)
#cap = cv2.VideoCapture(0)
#pull in the video but do NOT automatically convert to RGB, else it breaks the temperature data!
#https://.com/questions/63108721/opencv-setting-videocap-property-to-cap-prop-convert-rgb-generates-weird-boolean
if isPi == True:
cap.set(cv2.CAP_PROP_CONVERT_RGB, 0.0)
else:
cap.set(cv2.CAP_PROP_CONVERT_RGB, False)
#256x192 General settings
width = 256 #Sensor width
height = 192 #sensor height
scale = 3 #scale multiplier
newWidth = width*scale
newHeight = height*scale
alpha = 1.0 # Contrast control (1.0-3.0)
colormap = 5
font=cv2.FONT_HERSHEY_SIMPLEX
dispFullscreen = False
cv2.namedWindow('Thermal',cv2.WINDOW_GUI_NORMAL)
cv2.resizeWindow('Thermal', newWidth,newHeight)
rad = 0 #blur radius
threshold = 2
hud = False
recording = False
elapsed = "00:00:00"
snaptime = "None"
# Initialize YOLO model
model = YOLO('yolov8n.pt')
def rec():
now = time.strftime("%Y%m%d--%H%M%S")
#do NOT use mp4 here, it is flakey!
videoOut = cv2.VideoWriter(now+'output.avi', cv2.VideoWriter_fourcc(*'XVID'),25, (newWidth,newHeight))
return(videoOut)
def snapshot(heatmap):
#I would put colons in here, but it Win throws a fit if you try and open them!
now = time.strftime("%Y%m%d-%H%M%S")
snaptime = time.strftime("%H:%M:%S")
cv2.imwrite("TC001"+now+".png", heatmap)
return snaptime
while(cap.isOpened()):
# Capture frame-by-frame
ret, frame = cap.read()
if ret == True:
imdata,thdata = np.array_split(frame, 2)
#now parse the data from the bottom frame and convert to temp!
#https://www.eevblog.com/forum/thermal-imaging/infiray-and-their-p2-pro-discussion/200/
#Huge props to LeoDJ for figuring out how the data is stored and how to compute temp from it.
#grab data from the center pixel...
hi = thdata[96][128][0]
lo = thdata[96][128][1]
#print(hi,lo)
lo = lo*256
rawtemp = hi+lo
#print(rawtemp)
temp = (rawtemp/64)-273.15
temp = round(temp,2)
#print(temp)
#break
#find the max temperature in the frame
lomax = thdata[...,1].max()
posmax = thdata[...,1].argmax()
#since argmax returns a linear index, convert back to row and col
mcol,mrow = divmod(posmax,width)
himax = thdata[mcol][mrow][0]
lomax=lomax*256
maxtemp = himax+lomax
maxtemp = (maxtemp/64)-273.15
maxtemp = round(maxtemp,2)
#find the lowest temperature in the frame
lomin = thdata[...,1].min()
posmin = thdata[...,1].argmin()
#since argmax returns a linear index, convert back to row and col
lcol,lrow = divmod(posmin,width)
himin = thdata[lcol][lrow][0]
lomin=lomin*256
mintemp = himin+lomin
mintemp = (mintemp/64)-273.15
mintemp = round(mintemp,2)
#find the average temperature in the frame
loavg = thdata[...,1].mean()
hiavg = thdata[...,0].mean()
loavg=loavg*256
avgtemp = loavg+hiavg
avgtemp = (avgtemp/64)-273.15
avgtemp = round(avgtemp,2)
# Convert the real image to RGB
bgr = cv2.cvtColor(imdata, cv2.COLOR_YUV2BGR_YUYV)
# Contrast
bgr = cv2.convertScaleAbs(bgr, alpha=alpha) # Contrast
# bicubic interpolate, upscale and blur
bgr = cv2.resize(bgr, (newWidth, newHeight), interpolation=cv2.INTER_CUBIC) # Scale up!
if rad > 0:
bgr = cv2.blur(bgr, (rad, rad))
# YOLO object detection
detections = model(imdata, stream=False)
if colormap == 5:
heatmap = cv2.applyColorMap(bgr, cv2.COLORMAP_BONE)
cmapText = 'Bone'
for detection in detections:
x, y, w, h, conf, cls = detection
cv2.rectangle(imdata, (x, y), (x+w, y+h), (0, 255, 0), 2)
cv2.putText(imdata, f'{cls}: {conf:.2f}', (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
cv2.line(heatmap, (int(newWidth / 2), int(newHeight / 2) + 20), \
(int(newWidth / 2), int(newHeight / 2) - 20), (255, 255, 255), 2) # vline
cv2.line(heatmap, (int(newWidth / 2) + 20, int(newHeight / 2)), \
(int(newWidth / 2) - 20, int(newHeight / 2)), (255, 255, 255), 2) # hline
cv2.line(heatmap, (int(newWidth / 2), int(newHeight / 2) + 20), \
(int(newWidth / 2), int(newHeight / 2) - 20), (0, 0, 0), 1) # vline
cv2.line(heatmap, (int(newWidth / 2) + 20, int(newHeight / 2)), \
(int(newWidth / 2) - 20, int(newHeight / 2)), (0, 0, 0), 1) # hline
# show temp
cv2.putText(heatmap, str(temp) + ' C', (int(newWidth / 2) + 10, int(newHeight / 2) - 10), \
cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 0, 0), 2, cv2.LINE_AA)
cv2.putText(heatmap, str(temp) + ' C', (int(newWidth / 2) + 10, int(newHeight / 2) - 10), \
cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 255, 255), 1, cv2.LINE_AA)
# Draw bounding boxes on the frame
if maxtemp > avgtemp+threshold:
cv2.circle(heatmap, (mrow*scale, mcol*scale), 5, (0,0,0), 2)
cv2.circle(heatmap, (mrow*scale, mcol*scale), 5, (0,0,255), -1)
cv2.putText(heatmap,str(maxtemp)+' C', ((mrow*scale)+10, (mcol*scale)+5),\
cv2.FONT_HERSHEY_SIMPLEX, 0.45,(0,0,0), 2, cv2.LINE_AA)
cv2.putText(heatmap,str(maxtemp)+' C', ((mrow*scale)+10, (mcol*scale)+5),\
cv2.FONT_HERSHEY_SIMPLEX, 0.45,(0, 255, 255), 1, cv2.LINE_AA)
#display floating min temp
if mintemp < avgtemp-threshold:
cv2.circle(heatmap, (lrow*scale, lcol*scale), 5, (0,0,0), 2)
cv2.circle(heatmap, (lrow*scale, lcol*scale), 5, (255,0,0), -1)
cv2.putText(heatmap,str(mintemp)+' C', ((lrow*scale)+10, (lcol*scale)+5),\
cv2.FONT_HERSHEY_SIMPLEX, 0.45,(0,0,0), 2, cv2.LINE_AA)
cv2.putText(heatmap,str(mintemp)+' C', ((lrow*scale)+10, (lcol*scale)+5),\
cv2.FONT_HERSHEY_SIMPLEX, 0.45,(0, 255, 255), 1, cv2.LINE_AA)
#display image
cv2.imshow('Thermal',heatmap)
'''
if recording == True:
elapsed = (time.time() - start)
elapsed = time.strftime("%H:%M:%S", time.gmtime(elapsed))
#print(elapsed)
videoOut.write(heatmap)
keyPress = cv2.waitKey(1)
if keyPress == ord('a'): #Increase blur radius
rad += 1
if keyPress == ord('z'): #Decrease blur radius
rad -= 1
if rad <= 0:
rad = 0
if keyPress == ord('s'): #Increase threshold
threshold += 1
if keyPress == ord('x'): #Decrease threashold
threshold -= 1
if threshold <= 0:
threshold = 0
if keyPress == ord('d'): #Increase scale
scale += 1
if scale >=5:
scale = 5
newWidth = width*scale
newHeight = height*scale
if dispFullscreen == False and isPi == False:
cv2.resizeWindow('Thermal', newWidth,newHeight)
if keyPress == ord('c'): #Decrease scale
scale -= 1
if scale <= 1:
scale = 1
newWidth = width*scale
newHeight = height*scale
if dispFullscreen == False and isPi == False:
cv2.resizeWindow('Thermal', newWidth,newHeight)
if keyPress == ord('q'): #enable fullscreen
dispFullscreen = True
cv2.namedWindow('Thermal',cv2.WND_PROP_FULLSCREEN)
cv2.setWindowProperty('Thermal',cv2.WND_PROP_FULLSCREEN,cv2.WINDOW_FULLSCREEN)
if keyPress == ord('w'): #disable fullscreen
dispFullscreen = False
cv2.namedWindow('Thermal',cv2.WINDOW_GUI_NORMAL)
cv2.setWindowProperty('Thermal',cv2.WND_PROP_AUTOSIZE,cv2.WINDOW_GUI_NORMAL)
cv2.resizeWindow('Thermal', newWidth,newHeight)
if keyPress == ord('f'): #contrast+
alpha += 0.1
alpha = round(alpha,1)#fix round error
if alpha >= 3.0:
alpha=3.0
if keyPress == ord('v'): #contrast-
alpha -= 0.1
alpha = round(alpha,1)#fix round error
if alpha<=0:
alpha = 0.0
if keyPress == ord('h'):
if hud==True:
hud=False
elif hud==False:
hud=True
if keyPress == ord('m'): #m to cycle through color maps
colormap += 1
if colormap == 11:
colormap = 0
if keyPress == ord('r') and recording == False: #r to start reording
videoOut = rec()
recording = True
start = time.time()
if keyPress == ord('t'): #f to finish reording
recording = False
elapsed = "00:00:00"
if keyPress == ord('p'): #f to finish reording
snaptime = snapshot(heatmap)
if keyPress == ord('q'):
break
capture.release()
cv2.destroyAllWindows()
'''
问题解决了。我过度思考了需要做什么。由于我对任何温度数据都不感兴趣,例如获取最高温度或最低温度,因此我不需要基于上面发布的代码编写代码。我能够使用 USB 热像仪,只需一点代码即可摆脱显示的双重图像。如果有人知道为什么上面的代码不起作用,我仍然很好奇。