一个脚本直接干完

echo y | conda install cuda=11.8.0 -c nvidia
pip install torch torchvision torchaudio --index-url <https://download.pytorch.org/whl/cu118>
pip install liger-kernel
pip install flash-attn deepspeed transformers --extra-index-url <https://download.pytorch.org/whl/cu118>
nvcc --version
pip install vllm --extra-index-url <https://download.pytorch.org/whl/cu118>

cuda

echo y | conda install cuda=11.8.0 -c nvidia
echo y | conda install cuda=12.2.0 -c nvidia
echo y | conda install pytorch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 pytorch-cuda=11.8 -c pytorch -c nvidia

torch

Previous PyTorch Versions | PyTorch

pip uninstall torch torchvision torchaudio

# cuda 12.1
pip install torch torchvision torchaudio --index-url <https://download.pytorch.org/whl/cu121>

# cuda 11.8
pip install torch torchvision torchaudio --index-url <https://download.pytorch.org/whl/cu118>

flash attn

For CUDA 11, we only compile for CUDA 11.8, and for CUDA 12 we only compile for CUDA 12.2

尽量选11.8

git clone [email protected]:Dao-AILab/flash-attention.git
git checkout tags/v2.3.6
python setup.py install

# A100 可以额外装一下 layer_norm
cd csrc/layer_norm && pip install .

deepspeed

pip install deepspeed

transformers

git clone [email protected]:huggingface/transformers.git
git checkout tags/v4.44.2
pip install -e .

vllm

pip install vllm
vllm serve LLMs/Meta-Llama-3-8B-Instruct --dtype bfloat16 --api-key hello --port 8999
---
from openai import OpenAI
client = OpenAI(api_key="hello", base_url="<http://127.0.0.1:8999/v1>")
client.models.list()
---

# Install vLLM from source
git clone <https://github.com/vllm-project/vllm.git>
cd vllm
pip install -e . --extra-index-url <https://download.pytorch.org/whl/cu121>
# if error
export CUDA_HOME=/usr/local/cuda
export PATH="${CUDA_HOME}/bin:$PATH"
nvcc --version # verify that nvcc is in your PATH
${CUDA_HOME}/bin/nvcc --version # verify that nvcc is in your CUDA_HOME

# Install vLLM with CUDA 11.8.
export VLLM_VERSION=0.4.0
export PYTHON_VERSION=310
pip install <https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}+cu118-cp${PYTHON_VERSION}-cp${PYTHON_VERSION}-manylinux1_x86_64.whl> --extra-index-url <https://download.pytorch.org/whl/cu118>