Skip to content

Commit 1b38ccf

Browse files
authored
Merge pull request #11 from sfallah/sf/deepseek-ocr-merge_#17965
Merged with PR ggml-org#17965
2 parents 4cbbe8a + fb3bb6a commit 1b38ccf

File tree

102 files changed

+4459
-3189
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

102 files changed

+4459
-3189
lines changed

.devops/cann.Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
# Define the CANN base image for easier version updates later
66
ARG CHIP_TYPE=910b
7-
ARG CANN_BASE_IMAGE=quay.io/ascend/cann:8.3.rc1.alpha001-${CHIP_TYPE}-openeuler22.03-py3.11
7+
ARG CANN_BASE_IMAGE=quay.io/ascend/cann:8.3.rc2-${CHIP_TYPE}-openeuler24.03-py3.11
88

99
# ==============================================================================
1010
# BUILD STAGE
@@ -111,7 +111,7 @@ ENTRYPOINT ["/app/tools.sh"]
111111
# ==============================================================================
112112
FROM base AS light
113113

114-
COPY --from=build /app/full/llama-cli /app
114+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
115115

116116
ENTRYPOINT [ "/app/llama-cli" ]
117117

.devops/cpu.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ ENTRYPOINT ["/app/tools.sh"]
6868
### Light, CLI only
6969
FROM base AS light
7070

71-
COPY --from=build /app/full/llama-cli /app
71+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
7272

7373
WORKDIR /app
7474

.devops/cuda.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ ENTRYPOINT ["/app/tools.sh"]
7474
### Light, CLI only
7575
FROM base AS light
7676

77-
COPY --from=build /app/full/llama-cli /app
77+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
7878

7979
WORKDIR /app
8080

.devops/intel.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ ENTRYPOINT ["/app/tools.sh"]
7373
FROM base AS light
7474

7575
COPY --from=build /app/lib/ /app
76-
COPY --from=build /app/full/llama-cli /app
76+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
7777

7878
WORKDIR /app
7979

.devops/musa.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ ENTRYPOINT ["/app/tools.sh"]
8181
### Light, CLI only
8282
FROM base AS light
8383

84-
COPY --from=build /app/full/llama-cli /app
84+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
8585

8686
WORKDIR /app
8787

.devops/rocm.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ ENTRYPOINT ["/app/tools.sh"]
9494
### Light, CLI only
9595
FROM base AS light
9696

97-
COPY --from=build /app/full/llama-cli /app
97+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
9898

9999
WORKDIR /app
100100

.devops/s390x.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ WORKDIR /llama.cpp/bin
105105

106106
# Copy llama.cpp binaries and libraries
107107
COPY --from=collector /llama.cpp/bin/*.so /llama.cpp/bin
108-
COPY --from=collector /llama.cpp/bin/llama-cli /llama.cpp/bin
108+
COPY --from=collector /llama.cpp/bin/llama-cli /llama.cpp/bin/llama-completion /llama.cpp/bin
109109

110110
ENTRYPOINT [ "/llama.cpp/bin/llama-cli" ]
111111

.devops/tools.sh

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ elif [[ "$arg1" == '--quantize' || "$arg1" == '-q' ]]; then
1313
exec ./llama-quantize "$@"
1414
elif [[ "$arg1" == '--run' || "$arg1" == '-r' ]]; then
1515
exec ./llama-cli "$@"
16+
elif [[ "$arg1" == '--run-legacy' || "$arg1" == '-l' ]]; then
17+
exec ./llama-completion "$@"
1618
elif [[ "$arg1" == '--bench' || "$arg1" == '-b' ]]; then
1719
exec ./llama-bench "$@"
1820
elif [[ "$arg1" == '--perplexity' || "$arg1" == '-p' ]]; then
@@ -32,8 +34,10 @@ elif [[ "$arg1" == '--server' || "$arg1" == '-s' ]]; then
3234
else
3335
echo "Unknown command: $arg1"
3436
echo "Available commands: "
35-
echo " --run (-r): Run a model previously converted into ggml"
36-
echo " ex: -m /models/7B/ggml-model-q4_0.bin -p \"Building a website can be done in 10 simple steps:\" -n 512"
37+
echo " --run (-r): Run a model (chat) previously converted into ggml"
38+
echo " ex: -m /models/7B/ggml-model-q4_0.bin"
39+
echo " --run-legacy (-l): Run a model (legacy completion) previously converted into ggml"
40+
echo " ex: -m /models/7B/ggml-model-q4_0.bin -no-cnv -p \"Building a website can be done in 10 simple steps:\" -n 512"
3741
echo " --bench (-b): Benchmark the performance of the inference for various parameters."
3842
echo " ex: -m model.gguf"
3943
echo " --perplexity (-p): Measure the perplexity of a model over a given text."

.devops/vulkan.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ ENTRYPOINT ["/app/tools.sh"]
6868
### Light, CLI only
6969
FROM base AS light
7070

71-
COPY --from=build /app/full/llama-cli /app
71+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
7272

7373
WORKDIR /app
7474

.github/workflows/build.yml

Lines changed: 41 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1400,25 +1400,54 @@ jobs:
14001400
chip_type: ['910b', '310p']
14011401
build: ['Release']
14021402
runs-on: ${{ matrix.arch == 'aarch64' && 'ubuntu-24.04-arm' || 'ubuntu-24.04' }}
1403-
container: ascendai/cann:${{ matrix.chip_type == '910b' && '8.3.rc1.alpha001-910b-openeuler22.03-py3.11' || '8.2.rc1-310p-openeuler22.03-py3.11' }}
14041403
steps:
14051404
- name: Checkout
14061405
uses: actions/checkout@v4
1406+
with:
1407+
fetch-depth: 0
14071408

1408-
- name: Dependencies
1409-
run: |
1410-
yum update -y
1411-
yum install -y git gcc gcc-c++ make cmake libcurl-devel
1409+
- name: Free up disk space
1410+
uses: ggml-org/[email protected]
1411+
with:
1412+
tool-cache: true
14121413

1413-
- name: Build
1414+
- name: Set container image
1415+
id: cann-image
14141416
run: |
1415-
export LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/$(uname -m)-linux/devlib/:${LD_LIBRARY_PATH}
1417+
image="ascendai/cann:${{ matrix.chip_type == '910b' && '8.3.rc2-910b-openeuler24.03-py3.11' || '8.3.rc2-310p-openeuler24.03-py3.11' }}"
1418+
echo "image=${image}" >> "${GITHUB_OUTPUT}"
14161419
1417-
cmake -S . -B build \
1418-
-DCMAKE_BUILD_TYPE=${{ matrix.build }} \
1419-
-DGGML_CANN=on \
1420-
-DSOC_TYPE=ascend${{ matrix.chip_type }}
1421-
cmake --build build -j $(nproc)
1420+
- name: Pull container image
1421+
run: docker pull "${{ steps.cann-image.outputs.image }}"
1422+
1423+
- name: Build
1424+
env:
1425+
BUILD_TYPE: ${{ matrix.build }}
1426+
SOC_TYPE: ascend${{ matrix.chip_type }}
1427+
run: |
1428+
HOST_UID=$(id -u)
1429+
HOST_GID=$(id -g)
1430+
1431+
docker run --rm \
1432+
-v "${PWD}:/workspace" \
1433+
-w /workspace \
1434+
-e SOC_TYPE=${SOC_TYPE} \
1435+
-e BUILD_TYPE=${BUILD_TYPE} \
1436+
"${{ steps.cann-image.outputs.image }}" \
1437+
bash -lc '
1438+
set -e
1439+
yum install -y --setopt=install_weak_deps=False --setopt=tsflags=nodocs git gcc gcc-c++ make cmake libcurl-devel
1440+
yum clean all && rm -rf /var/cache/yum
1441+
git config --global --add safe.directory "/workspace"
1442+
export LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/$(uname -m)-linux/devlib/:${LD_LIBRARY_PATH}
1443+
cmake -S . -B build \
1444+
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
1445+
-DGGML_CANN=on \
1446+
-DSOC_TYPE=${SOC_TYPE}
1447+
cmake --build build -j $(nproc)
1448+
1449+
chown -R '"${HOST_UID}"':'"${HOST_GID}"' /workspace/build
1450+
'
14221451
14231452
# TODO: simplify the following workflows using a matrix
14241453
# TODO: run lighter CI on PRs and the full CI only on master (if needed)

0 commit comments

Comments
 (0)