作者: 极客天成Scaleflash
驱动下载:
dpu os 自带ib驱动,在安装ib驱动时候会更新卡固件,dpu的固件版本需要和驱动固件为一个版本,在实际使用中发现dpu os驱动版本和主机驱动版本无法对上,尽量采用相近版本
如果无法过dou os固件版本只能先安装一个ib驱动,刷想要的系统进去看驱动版本,然后主机下载对应版本驱动更新固件即可。
ib卡驱动
DPU os下载
https://developer.nvidia.com/zh-cn/networking/doca
初始化配置
常用系统初始化配置
vim 格式
cat >> /root/.vimrc << ‘EOF’
set number
set tabstop=4
set shiftwidth=4
set ignorecase
set hlsearch
nnoremap :set nonumber!:set foldcolumn=0
set pastetoggle=
EOF
yum 本地仓库
[root@bms-03 ~]# cat /etc/yum.repos.d/iso.repo
[redhat-7.8-iso]
name=redhat-7.8-iso
baseurl=file:///sf/iso/
enabled=1
priority=1
gpgcheck=0
[root@bms-03 ~]#
安装驱动
驱动安装
./mlnxofedinstall --with-nvmf --force-fw-update --force
根据安装提示做完剩余操作
如法完成可以先设定dpu卡参数后重启生效
启动 rshim ,用于 host 访问 dou os
systemctl start rshim
编译安装驱动
非标准系统 编译方式安装驱动
redhat7.8 系统采用最小安装
yum install vim
yum install perl
yum install kernel-devel
yum install createrepo
yum install python-devel pciutils lsof redhat-rpm-config rpm-build gcc
yum install automake autoconf
yum install libtool
yum install libusbx tcl gcc-gfortran fuse-libs tcsh tk
yum install vim perl kernel-devel createrepo python-devel pciutils lsof redhat-rpm-config rpm-build gcc automake autoconf libtool libusbx tcl gcc-gfortran fuse-libs tcsh tk
./mlnxofedinstall --with-nvmf --add-kernel-support --fw-update-only --force
/tmp/MLNX_OFED_LINUX-5.2-2.2.3.0-3.10.0-1127.el7.x86_64/MLNX_OFED_LINUX-5.2-2.2.3.0-rhel7.8-ext/mlnxofedinstal --fw-update-only --force
dracut -f
systemctl start rshim
DPU刷入系统
host 启动 rshim 服务
systemctl start rshim
host 连接 dpu console
screen /dev/rshim0/console
host 上为 DPU 刷系统
cat Ubuntu20.04-MLNX_OFED-5.2-2.2.0.0-3.5.1.11601-1-aarch64.bfb > /dev/rshim0/boot
看 dpu console 等待系统安装完成
设置DPU
首先启动 mst,然后执行如下命令:
开启参数不对会无法启动 开启 NVME_EMULATION 支持
mst start
mlxconfig -d /dev/mst/mt41686_pciconf0 s INTERNAL_CPU_MODEL=1 NVME_EMULATION_ENABLE=1
host 重新上电 (此处可以先不重启等待 配置完dpu 后一次重启即可)
echo 3 > /proc/sys/vm/drop_caches
ipmitool power cycle
重启说明(该处为掉电重启,否则上述步骤中NVMEEMULATIONENABLE=1等参数不生效):
主机掉电重启 ipmitool power cycle
主机不掉电重启ipmitool power reset
External Media
配置DPU OS
hoat 连接 DPU console
systemctl restart rshim
screen /dev/rshim0/console
dpu 切换 root user
sudo passwd root
su -
hostnamectl set-hostname roce-dpu-100
dpu 配置 root ssh
vim /etc/ssh/sshd_config
PermitRootLogin yes
StrictModes yes
echo -e “\nPermitRootLogin yes\n” >> /etc/ssh/sshd_config
dpu 修正 snap
mkdir /root/bak/ ; mv /etc/mlnx_snap/*.conf /root/bak/
systemctl restart mlnx_snap; systemctl status mlnx_snap
dpu 关闭部分不需要的服务
systemctl stop docker;systemctl disable docker
systemctl disable cloud-init-local cloud-init cloud-config cloud-final
systemctl stop cloud-init-local cloud-init cloud-config cloud-final
systemctl stop x11-common.service; systemctl disable x11-common.service
systemctl stop open-iscsi ;systemctl disable open-iscsi
systemctl stop nfs-client.target
systemctl stop nfs-common.service
systemctl stop nfs-config.service
systemctl stop nfs-idmapd.service
systemctl stop nfs-utils.service
systemctl disable nfs-client.target
systemctl disable nfs-common.service
systemctl disable nfs-config.service
systemctl disable nfs-idmapd.service
systemctl disable nfs-utils.service
systemctl stop multipathd;systemctl disable multipathd
配置ip地址
mv /etc/netplan/50-cloud-init.yaml bak/
cat > /etc/netplan/99-ib.yaml << ‘EOF’
network:
version: 2
renderer: networkd
ethernets:
ibp3s0f0:
dhcp4: no
addresses:
- 192.168.6.214/24
ibp3s0f1:
dhcp4: no
addresses: - 192.168.8.214/24
EOF
cat > /etc/netplan/60-mlnx.yaml << ‘EOF’
network:
version: 2
renderer: networkd
ethernets:
ibp3s0f0:
dhcp4: no
addresses:
- 192.168.6.213/23
ibp3s0f1:
dhcp4: no
addresses: - 192.168.8.213/23
EOF
cat > /etc/netplan/60-mlnx.yaml << ‘EOF’
network:
version: 2
renderer: networkd
ethernets:
ibp3s0f0:
dhcp4: no
addresses:
- 192.168.1.14/23
ibp3s0f1:
dhcp4: no
addresses: - 192.168.3.14/23
EOF
使上述修改生效
netplan apply
所有配置已经完成 host 重新上电
做重启测试 以及验证 参数是否全部持久化
echo 3 > /proc/sys/vm/drop_caches
ipmitool power cycle
额外内容 无特殊情况无需配置
锁定内核防止更新
apt-mark hold linux-image-4.15.0-20-generic
apt-mark hold linux-image-generic
配置记录及注意事项
ib dpu
开启参数不对会无法启动 根据所需功能开启参数 此处开启 NVME_EMULATION 和 VIRTIO_BLK_EMULATION 支持
mlxconfig -d /dev/mst/mt41686_pciconf0 s INTERNAL_CPU_MODEL=1 NVME_EMULATION_ENABLE=1 VIRTIO_BLK_EMULATION_NUM_PF=1
root@dpu-213:~# mlxconfig -d /dev/mst/mt41686_pciconf0 s INTERNAL_CPU_MODEL=1 NVME_EMULATION_ENABLE=1 VIRTIO_NET_EMULATION_ENABLE=1 VIRTIO_BLK_EMULATION_NUM_PF=1
Device #1:
Device type: BlueField2
Name: MBF2H516A-EEEO_Ax_Bx
Description: BlueField-2 DPU 100GbE/EDR/HDR100 VPI Dual-Port QSFP56; PCIe Gen4 x16; Crypto Enabled; 16GB on-board DDR; 1GbE OOB management; FHHL
Device: /dev/mst/mt41686_pciconf0
Configurations: Next Boot New
INTERNAL_CPU_MODEL EMBEDDED_CPU(1) EMBEDDED_CPU(1)
NVME_EMULATION_ENABLE False(0) True(1)
VIRTIO_NET_EMULATION_ENABLE False(0) True(1)
VIRTIO_BLK_EMULATION_NUM_PF 0 1
Apply new Configuration? (y/n) [n] : y
Applying… Done!
-I- Please reboot machine to load new configurations.
root@dpu-213:~#
下面参数 hygon服务器无法开机 一直处于黑屏状态
PCI_SWITCH_EMULATION_ENABLE=1 PCI_SWITCH_EMULATION_NUM_PORT=32
RoCE DPU
roce dpu 与 ib dpu配置几乎一直就是网络初始化存在差异
驱动下载地址同上
网络初始化需要 dpu 启动后根据具体查询信息配置
配置记录
root@roce-dpu-100:~# systemctl stop openvswitch-switch.service; systemctl disable openvswitch-switch.service
systemctl start openvswitch-switch.service; systemctl enable openvswitch-switch.service
systemctl stop ufw; systemctl disable ufw
cat > /etc/netplan/60-mlnx.yaml << ‘EOF’
network:
ethernets:
enp3s0f0s0:
dhcp4: no
addresses:
- 192.168.6.100/24
enp3s0f1s0:
dhcp4: no
addresses: - 192.168.8.100/24
renderer: networkd
version: 2
EOF
DPU存储挂载
root@roce-dpu-100:~# nvme discover -t rdma -a 10.10.0.44 -s 4444
Discovery Log Number of Records 1, Generation counter 2
=====Discovery Log Entry 0======
trtype: rdma
adrfam: ipv4
subtype: nvme subsystem
treq: not specified, sq flow control disable supported
portid: 0
trsvcid: 4444
subnqn: t1-nvme5n1
traddr: 10.10.0.44
rdma_prtype: not specified
rdma_qptype: connected
rdma_cms: rdma-cm
rdma_pkey: 0x0000
root@roce-dpu-100:~# spdk_rpc.py bdev_nvme_attach_controller -b nvme0 -t rdma -f ipv4 -a 10.10.0.44 -s 4444 -n t1-nvme5n1
nvme0n1
root@roce-dpu-100:~# snap_rpc.py subsystem_nvme_create --nqn nqn.bluefield.sf.0001 SNAPNVME0001 “Mellanox NVMe SNAP Controller”
{
“nqn”: “nqn.bluefield.sf.0001”,
“subsys_id”: 0
}
root@roce-dpu-100:~# ibdev2netdev | grep enp3s0f0s0
mlx5_2 port 1 ==> enp3s0f0s0 (Up)
root@roce-dpu-100:~# snap_rpc.py emulation_functions_list
[
{
“emulation_manager”: “mlx5_0”,
“hotplugged”: false,
“emulation_type”: “nvme”,
“pf_index”: 0,
“VUID”: “”,
“pci_bdf”: “84:00.2”,
“controller”: {},
“num_vfs”: 0
}
]
root@roce-dpu-100:~# snap_rpc.py controller_nvme_create --pf_id=0 -c /etc/mlnx_snap/mlnx_snap.json --nqn nqn.bluefield.sf.0001 mlx5_0
{
“name”: “NvmeEmu0pf0”,
“cntlid”: 0,
“version”: “1.3.0”,
“offload”: false,
“mempool”: false,
“max_nsid”: 1024,
“max_namespaces”: 1024
}
root@roce-dpu-100:~# snap_rpc.py controller_nvme_namespace_attach -c NvmeEmu0pf0 spdk nvme0n1 1
主机:
[root@node26 ~]# rmmod nvme
[root@node26 ~]# modprobe nvme
[root@node26 ~]#
[root@node26 ~]# nvme list
Node SN Model Namespace Usage Format FW Rev
/dev/nvme0n1 A065E7B6 WUS4BA118DSP3X1 1 1.80 TB / 1.80 TB 512 B + 0 B R2109003
/dev/nvme1n1 SNAPNVME0001 Mellanox NVMe SNAP Controller 1 3.84 TB / 3.84 TB 512 B + 0 B 1.0
/dev/nvme2n1 A065DECE WUS4BA118DSP3X1 1 1.80 TB / 1.80 TB 512 B + 0 B R2109003