赞
踩
尝试在算能云riscv环境里编译飞桨。
先总结操作步骤:
下载飞桨代码GitHub - PaddlePaddle/Paddle: PArallel Distributed Deep LEarning: Machine Learning Framework from Industrial Practice (『飞桨』核心框架,深度学习&机器学习高性能单机、分布式训练和跨平台部署):git clone https://github.com/paddlepaddle/paddle
参照prAdding a compile option to Paddle for Risc-V · PaddlePaddle/Paddle@d3db383 · GitHub修改代码
然后编译
- cmake ../ -DWITH_GPU=OFF -DWITH_RISCV=ON
- make -j 128 TARGET=RISCV64_GENERIC
(注意以上两句最好分开执行,以便发现cmake是否报错,尤其是修改了cmake 配置文件之后。否则可能没生效而重复编译)
编译好后安装:
pip install paddlepaddle-0.0.0-cp38-cp38-linux_riscv64.whl -i https://mirror.baidu.com/pypi/simple
最后一步:注册libpaddle.so ,使用命令:patchelf --add-needed libatomic.so.1 /usr/local/lib/python3.8/dist-packages/paddle/base/libpaddle.so
这样大家就可以愉快的使用飞桨在RISCV环境下进行AI机器学习拉!
git clone https://github.com/paddlepaddle/padle
pip3 install protobuf
还有一些库等,具体可以参考编译pytorch的文档:算能RISC-V通用云开发空间编译pytorch @openKylin留档-CSDN博客
mkdir build
cd build
cmake ../
我的天,除了protobuf,竟然一下子编译完成了!
- Automatic code generation for paddle/fluid/primitive succeed.
- Automatic code generation for decomp interface succeed.
- WITH_DLNNE:
- -- Configuring done
- -- Generating done
- -- Build files have been written to: /root/github/paddle/build
是我肤浅了,后面还需要编译呢root@863c89a419ec:~/github/paddle/build# make
make -j 16
make -j 8 TARGET=RISCV64_GENERIC -dw
如果碰到报错,根据报错进行处理,比如哪个第三方库编译失败,就删除那个目录,然后在third_party目录执行:git submodule update --init --recursive
然后再make 即可。
困了,让它编译去吧。
报错了
通过官网issue,发现cmake指令为:
cmake ../ -DWITH_GPU=OFF -WITH_RISCV=ON
make 指令为;
make -j 16 TARGET=RISCV64_GENERIC
拼写错误,应该是:
cmake ../ -DWITH_GPU=OFF -DWITH_RISCV=ON
然后再make
看MakeFile文件里面没有RISCV选项,不知道是不是cmake编译器那边支持,先执行看看。(后来知道,人家这个RISCV是对应了专门的pr的,目前这个pr还没有合并,所以需要手工改代码,具体修改方法如下)
cmake结束之后提示WITH_RISCV没有生效
修改CMakeFile文件,在WITH_ARM的后面加上下面内容:
- if(WITH_RISCV)
- set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC")
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
- set(WITH_XBYAK
- OFF
- CACHE STRING "Disable XBYAK when compiling WITH_RISCV=ON." FORCE)
- set(WITH_MKL
- OFF
- CACHE STRING "Disable MKL when compiling WITH_RISCV=ON." FORCE)
- set(WITH_AVX
- OFF
- CACHE STRING "Disable AVX when compiling WITH_AVX=OFF." FORCE)
- add_definitions(-DPADDLE_WITH_RISCV)
- endif()
在ARM后面加上:AND NOT WITH_RISCV
同时找到了-m64的位置。因为加了这句,就不用手工删除-m64这句。
一共需要修改四处,在四处加上defined(PADDLE_WITH_RISCV):
- #pragma once
-
- #if !defined(PADDLE_WITH_ARM) && !defined(PADDLE_WITH_SW) && \
- !defined(PADDLE_WITH_MIPS) && !defined(PADDLE_WITH_LOONGARCH) && \
- !defined(PADDLE_WITH_RISCV)
- #include <immintrin.h>
- #endif
- #include <cfloat>
- @@ -103,7 +104,8 @@ void call_gemm_batched(const framework::ExecutionContext& ctx,
- }
-
- #if !defined(PADDLE_WITH_ARM) && !defined(PADDLE_WITH_SW) && \
- !defined(PADDLE_WITH_MIPS) && !defined(PADDLE_WITH_LOONGARCH) && \
- !defined(PADDLE_WITH_RISCV)
-
- #define __m256x __m256
-
- @@ -144,7 +146,8 @@ inline void axpy(const T* x, T* y, size_t len, const T alpha) {
- _mm256_mul_px(mm_alpha, _mm256_load_px(x + jjj))));
- }
- #elif defined(PADDLE_WITH_ARM) || defined(PADDLE_WITH_SW) || \
- defined(PADDLE_WITH_MIPS) || defined(PADDLE_WITH_LOONGARCH) || \
- defined(PADDLE_WITH_RISCV)
- PADDLE_THROW(platform::errors::Unimplemented("axpy is not supported"));
- #else
- lll = len & ~SSE_CUT_LEN_MASK;
- @@ -174,7 +177,8 @@ inline void axpy_noadd(const T* x, T* y, size_t len, const T alpha) {
- _mm256_store_px(y + jjj, _mm256_mul_px(mm_alpha, _mm256_load_px(x + jjj)));
- }
- #elif defined(PADDLE_WITH_ARM) || defined(PADDLE_WITH_SW) || \
- defined(PADDLE_WITH_MIPS) || defined(PADDLE_WITH_LOONGARCH) || \
- defined(PADDLE_WITH_RISCV)
- PADDLE_THROW(platform::errors::Unimplemented("axpy_noadd is not supported"));
- #else
- lll = len & ~SSE_CUT_LEN_MASK;
33行if那句后面加上RISCV,变成这样:
- #if !defined(GCC_WITHOUT_INTRINSICS) && !defined(PADDLE_WITH_ARM) && \
- !defined(PADDLE_WITH_SW) && !defined(PADDLE_WITH_MIPS) && \
- !defined(_WIN32) && !defined(PADDLE_WITH_LOONGARCH) && \
- !defined(PADDLE_WITH_RISCV)
- #define DENORM_USE_INTRINSICS
- } else {
- #if !defined(WITH_NV_JETSON) && !defined(PADDLE_WITH_ARM) && \
- !defined(PADDLE_WITH_SW) && !defined(PADDLE_WITH_MIPS) && \
- !defined(PADDLE_WITH_LOONGARCH) && !defined(PADDLE_WITH_RISCV)
- std::array<int, 4> reg;
- cpuid(reg.data(), 0);
- int nIds = reg[0];
- #define cpuid(reg, x) __cpuidex(reg, x, 0)
- #else
- #if !defined(WITH_NV_JETSON) && !defined(PADDLE_WITH_ARM) && \
- !defined(PADDLE_WITH_SW) && !defined(PADDLE_WITH_MIPS) && \
- !defined(PADDLE_WITH_LOONGARCH) && !defined(PADDLE_WITH_RISCV)
- #include <cpuid.h>
- inline void cpuid(int reg[4], int x) {
感觉离曙光很近了。
重新cmake和make,希望不会碰到“pmmintrin.h”文件找不到的错误。
发现少修改一个文件
- if(WITH_POCKETFFT)
- include(external/pocketfft)
- list(APPEND third_party_deps extern_pocketfft)
- add_definitions(-DPADDLE_WITH_POCKETFFT)
- if(WITH_RISCV)
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-maybe-uninitialized") # Warnings in pocketfft_hdronly.h
- endif()
- endif()
11点开始编译,计时开始!15:37分编译完成,耗时4小时37分钟!
哈哈,它静静的躺在那里:
- root@863c89a419ec:~/github/paddle/build/python# ls dist/
- paddlepaddle-0.0.0-cp38-cp38-linux_riscv64.whl
- root@863c89a419ec:~/github/paddle/build/python# pip3 install dist/paddlepaddle-0.0.0-cp38-cp38-linux_riscv64.whl -i https://mirror.baidu.com/pypi/simple
- Processing ./dist/paddlepaddle-0.0.0-cp38-cp38-linux_riscv64.whl
安装:
pip install paddlepaddle-0.0.0-cp38-cp38-linux_riscv64.whl
安装后执行import paddle 报错:ImportError: /usr/local/lib/python3.8/dist-packages/paddle/base/libpaddle.so: undefined symbol: __atomic_exchange_1
按照这个issue 算能云RISCV环境编译后import paddle报错 · Issue #62037 · PaddlePaddle/Paddle · GitHub里面修改,还是报错
将build文件夹删除,重新建立,重新cmake 和make,并把-j改成128
问题依旧
突然想到pytorch安装的时候也碰到这个报错,于是找到了解决方法:
只需要用patchelf注册一下就行了`patchelf --add-needed libatomic.so.1 /usr/local/lib/python3.8/dist-packages/paddle/base/libpaddle.so `
如果没有patchelf,apt install patchelf安装即可。
终于飞桨可以用了:
- root@863c89a419ec:~# python3
- Python 3.8.2 (default, Jan 18 2024, 07:05:37)
- [GCC 9.3.0] on linux
- Type "help", "copyright", "credits" or "license" for more information.
- >>> import paddle
- >>> paddle.utils.run_check()
- Running verify PaddlePaddle program ...
- I0228 07:37:38.344522 279426 program_interpreter.cc:220] New Executor is Running.
- I0228 07:37:38.475822 279426 interpreter_util.cc:652] Standalone Executor is Used.
- PaddlePaddle works well on 1 CPU.
- PaddlePaddle is installed successfully! Let's start deep learning with PaddlePaddle now.
目前已编译成功。
目前import paddle成功。run_check()成功。飞桨成功编译好了,可以开始使用了!
- >>> import paddle
- >>> paddle.utils.run_check()
- Running verify PaddlePaddle program ...
- I0228 07:37:38.344522 279426 program_interpreter.cc:220] New Executor is Running.
- I0228 07:37:38.475822 279426 interpreter_util.cc:652] Standalone Executor is Used.
- PaddlePaddle works well on 1 CPU.
- PaddlePaddle is installed successfully! Let's start deep learning with PaddlePaddle now.
- >>> x = paddle.randn((2,3))
- >>> y = paddle.randn((2,3))
- >>> z = x+y
- >>> z
- Tensor(shape=[2, 3], dtype=float32, place=Place(cpu), stop_gradient=True,
- [[ 0.70946050, -0.07831943, -0.60504013],
- [-2.55267453, -0.59097183, 2.08291411]])
测试飞桨单机实例,pass第五十二回 戴宗二取公孙胜 李逵独劈罗真人-飞桨AI框架安装和使用示例-CSDN博客
Could NOT find PY_google.protobuf (missing: PY_GOOGLE.PROTOBUF)
安装protobuf
pip3 install protobuf
[ 97%] Building CXX object gloo/CMakeFiles/gloo.dir/transport/tcp/unbound_buffer.cc.o
[100%] Linking CXX static library libgloo.a
[100%] Built target gloo
[ 3%] Performing install step for 'extern_gloo'
[ 3%] Completed 'extern_gloo'
[ 3%] Built target extern_gloo
make: *** [Makefile:136: all] Error 2
再重新编译,到了protobuf这里报错:
-- Installing: /root/github/paddle/build/third_party/install/protobuf/lib/cmake/protobuf/protobuf-config.cmake
[ 4%] Completed 'extern_protobuf'
[ 4%] Built target extern_protobuf
make: *** [Makefile:136: all] Error 2
再重新编译,发现这里报错:
cc1: error: requested ABI requires '-march' to subsume the 'D' extension
cc1: error: ABI requires '-march=rv64'
make[4]: *** [Makefile:737: isamin.o] Error 1
cc1: error: '-march=loongson3a': ISA string must begin with rv32 or rv64
make[4]: *** [Makefile:611: sasum.o] Error 1
cc1: error: '-march=loongson3a': ISA string must begin with rv32 or rv64
cc1: error: '-march=loongson3a': ISA string must begin with rv32 or rv64
cc1: error: requested ABI requires '-march' to subsume the 'D' extension
cc1: error: requested ABI requires '-march' to subsume the 'D' extension
cc1: error: ABI requires '-march=rv64'
cc1: error: ABI requires '-march=rv64'
cc1: error: requested ABI requires '-march' to subsume the 'D' extension
cc1: error: ABI requires '-march=rv64'
make[4]: *** [Makefile:647: snrm2.o] Error 1
make[4]: *** [Makefile:629: ssum.o] Error 1
make[4]: *** [Makefile:701: smax.o] Error 1
make[4]: *** [Makefile:665: samax.o] Error 1
make[4]: *** [Makefile:755: ismax.o] Error 1
make[4]: *** [Makefile:773: sdsdot.o] Error 1
make[3]: *** [Makefile:164: libs] Error 1
make[2]: *** [CMakeFiles/extern_openblas.dir/build.make:86: third_party/openblas/src/extern_openblas-stamp/extern_openblas-build] Error 2
make[1]: *** [CMakeFiles/Makefile2:4001: CMakeFiles/extern_openblas.dir/all] Error 2
[ 5%] Built target eager_python_c_codegen
[ 5%] Built target op_map_codegen
[ 5%] Built target eager_codegen
make: *** [Makefile:136: all] Error 2
参考这里重新来过:
Live child 0x2af978fd70 (paddle/fluid/eager/auto_code_generator/generator/CMakeFiles/eager_codegen) PID 115530
Reaping winning child 0x2af978fd70 PID 115530
Removing child 0x2af978fd70 PID 115530 from chain.
Considering target file 'paddle/fluid/eager/auto_code_generator/generator/CMakeFiles/eager_codegen.dir/build'.
File 'paddle/fluid/eager/auto_code_generator/generator/CMakeFiles/eager_codegen.dir/build' does not exist.
Considering target file 'eager_codegen'.
File 'eager_codegen' does not exist.
Considering target file 'paddle/fluid/eager/auto_code_generator/generator/CMakeFiles/eager_codegen'.
File 'paddle/fluid/eager/auto_code_generator/generator/CMakeFiles/eager_codegen' was considered already.
Considering target file 'paddle/fluid/eager/auto_code_generator/generator/CMakeFiles/eager_codegen.dir/build.make'.
File 'paddle/fluid/eager/auto_code_generator/generator/CMakeFiles/eager_codegen.dir/build.make' was considered already.
Finished prerequisites of target file 'eager_codegen'.
Must remake target 'eager_codegen'.
Successfully remade target file 'eager_codegen'.
Finished prerequisites of target file 'paddle/fluid/eager/auto_code_generator/generator/CMakeFiles/eager_codegen.dir/build'.
Must remake target 'paddle/fluid/eager/auto_code_generator/generator/CMakeFiles/eager_codegen.dir/build'.
Successfully remade target file 'paddle/fluid/eager/auto_code_generator/generator/CMakeFiles/eager_codegen.dir/build'.
make[2]: Leaving directory '/root/github/paddle/build'
Reaping winning child 0x2b00eb0510 PID 115266
Live child 0x2b00eb0510 (paddle/fluid/eager/auto_code_generator/generator/CMakeFiles/eager_codegen.dir/all) PID 115532
[ 4%] Built target eager_codegen
Reaping winning child 0x2b00eb0510 PID 115532
Removing child 0x2b00eb0510 PID 115532 from chain.
make[1]: Leaving directory '/root/github/paddle/build'
Reaping losing child 0x2ad05e7c50 PID 115118
make: *** [Makefile:136: all] Error 2
Removing child 0x2ad05e7c50 PID 115118 from chain.
make: Leaving directory '/root/github/paddle/build'
在paddle/build/third_party删除eigen3目录,然后执行:git submodule update --init --recursive
问题解决。
问题没有解决,还是4%这里报错:
[ 4%] Built target eager_codegen
Reaping winning child 0x2b167efa70 PID 117670
Removing child 0x2b167efa70 PID 117670 from chain.
make[1]: Leaving directory '/root/github/paddle/build'
Reaping losing child 0x2b0332dc20 PID 117206
make: *** [Makefile:136: all] Error 2
Removing child 0x2b0332dc20 PID 117206 from chain.
make: Leaving directory '/root/github/paddle/build'
重新设置make 后,cmake ../ -DWITH_GPU=OFF -WITH_RISCV=ON 编译报错
按照文档,应该“查找makefile文件的生成逻辑,最终发现在Paddle.cmake文件中有这样一段逻辑”,将该文件中的-64去掉。
但是没有找到这个文件。找到了,在这里:
cmake/flags.cmake
if(NOT WITH_NV_JETSON
AND NOT WITH_ARM
AND NOT WITH_RISCV
AND NOT WITH_SW
AND NOT WITH_MIPS
AND NOT WITH_LOONGARCH)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64")
endif()
后期通过修改源码,添加RISCV选项的方法,使代码支持RISCV,并同时使用-m64失效
WITH_DLNNE:
-- Configuring done
-- Generating done
CMake Warning:
Manually-specified variables were not used by the project:
WITH_RISCV
按照飞桨官网pr,修改飞桨make相关文件。
[ 7%] Building CXX object paddle/fluid/platform/CMakeFiles/denormal.dir/denormal.cc.o
/root/github/paddle/paddle/fluid/platform/denormal.cc:38:10: fatal error: pmmintrin.h: No such file or directory
38 | #include <pmmintrin.h>
| ^~~~~~~~~~~~~
compilation terminated.
make[2]: *** [paddle/fluid/platform/CMakeFiles/denormal.dir/build.make:76: paddle/fluid/platform/CMakeFiles/denormal.dir/denormal.cc.o] Error 1
make[1]: *** [CMakeFiles/Makefile2:4787: paddle/fluid/platform/CMakeFiles/denormal.dir/all] Error 2
make[1]: *** Waiting for unfinished jobs....
这个对应的就是paddle/fluid/platform/denormal.cc 这个文件,已经修改。
编译安装好后,
oot@863c89a419ec:~/github/paddle/build# python3
Python 3.8.2 (default, Jan 18 2024, 07:05:37)
[GCC 9.3.0] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> import paddle
Error: Can not import paddle core while this file exists: /usr/local/lib/python3.8/dist-packages/paddle/base/libpaddle.so
Traceback (most recent call last):
File "/usr/local/lib/python3.8/dist-packages/paddle/base/core.py", line 267, in <module>
from . import libpaddle
ImportError: /usr/local/lib/python3.8/dist-packages/paddle/base/libpaddle.so: undefined symbol: __atomic_exchange_1
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/local/lib/python3.8/dist-packages/paddle/__init__.py", line 30, in <module>
from .base import core # noqa: F401
File "/usr/local/lib/python3.8/dist-packages/paddle/base/__init__.py", line 38, in <module>
from . import ( # noqa: F401
File "/usr/local/lib/python3.8/dist-packages/paddle/base/backward.py", line 25, in <module>
from . import core, framework, log_helper, unique_name
File "/usr/local/lib/python3.8/dist-packages/paddle/base/core.py", line 377, in <module>
if not avx_supported() and libpaddle.is_compiled_with_avx():
NameError: name 'libpaddle' is not defined
看文档,有人说是patchelf版本低的缘故,升级patchelf,再重新编译,问题依旧
https://github.com/PaddlePaddle/Paddle/issues/51536
已提issue :https://github.com/PaddlePaddle/Paddle/issues/62037
目前还未解决报错问题
只需要用patchelf注册一下就行了patchelf --add-needed libatomic.so.1 /usr/local/lib/python3.8/dist-packages/paddle/base/libpaddle.so
如果没有patchelf,apt install patchelf安装即可。
发现算能云是128核,所以修改参数-j 128
risc-v芯片上编译paddle报错 · Issue #61770 · PaddlePaddle/Paddle · GitHub
Adding a compile option to Paddle for Risc-V · PaddlePaddle/Paddle@d3db383 · GitHub
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。