Skip to content

Commit 292363e

Browse files
committed
Merge branch 'master' of https://github.com/ggerganov/llama.cpp into pull-3417
2 parents 7d6a24a + 95bd60a commit 292363e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

70 files changed

+7631
-1836
lines changed

.dockerignore

+3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
*.o
22
*.a
33
.cache/
4+
.git/
5+
.github/
6+
.gitignore
47
.vs/
58
.vscode/
69
.DS_Store

.github/workflows/build.yml

+33-10
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@ on:
1010
push:
1111
branches:
1212
- master
13-
paths: ['.github/workflows/**', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu']
13+
paths: ['.github/workflows/**', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m']
1414
pull_request:
1515
types: [opened, synchronize, reopened]
16-
paths: ['**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu']
16+
paths: ['**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m']
1717

1818
env:
1919
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
@@ -188,7 +188,7 @@ jobs:
188188
sysctl -a
189189
mkdir build
190190
cd build
191-
cmake -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF ..
191+
cmake ..
192192
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
193193
194194
- name: Test
@@ -253,6 +253,29 @@ jobs:
253253
-DCMAKE_OSX_DEPLOYMENT_TARGET=14.0
254254
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
255255
256+
macOS-latest-swift:
257+
runs-on: macos-latest
258+
259+
strategy:
260+
matrix:
261+
destination: ['generic/platform=macOS', 'generic/platform=iOS', 'generic/platform=tvOS']
262+
263+
steps:
264+
- name: Clone
265+
id: checkout
266+
uses: actions/checkout@v1
267+
268+
- name: Dependencies
269+
id: depends
270+
continue-on-error: true
271+
run: |
272+
brew update
273+
274+
- name: xcodebuild for swift package
275+
id: xcodebuild
276+
run: |
277+
xcodebuild -scheme llama -destination "${{ matrix.destination }}"
278+
256279
windows-latest-cmake:
257280
runs-on: windows-latest
258281

@@ -265,17 +288,17 @@ jobs:
265288
matrix:
266289
include:
267290
- build: 'noavx'
268-
defines: '-DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF -DBUILD_SHARED_LIBS=ON'
291+
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF -DBUILD_SHARED_LIBS=ON'
269292
- build: 'avx2'
270-
defines: '-DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
293+
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
271294
- build: 'avx'
272-
defines: '-DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX2=OFF -DBUILD_SHARED_LIBS=ON'
295+
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX2=OFF -DBUILD_SHARED_LIBS=ON'
273296
- build: 'avx512'
274-
defines: '-DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX512=ON -DBUILD_SHARED_LIBS=ON'
297+
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX512=ON -DBUILD_SHARED_LIBS=ON'
275298
- build: 'clblast'
276-
defines: '-DLLAMA_BUILD_SERVER=ON -DLLAMA_CLBLAST=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/clblast"'
299+
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CLBLAST=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/clblast"'
277300
- build: 'openblas'
278-
defines: '-DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
301+
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
279302

280303
steps:
281304
- name: Clone
@@ -414,7 +437,7 @@ jobs:
414437
run: |
415438
mkdir build
416439
cd build
417-
cmake .. -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUBLAS=ON -DBUILD_SHARED_LIBS=ON
440+
cmake .. -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUBLAS=ON -DBUILD_SHARED_LIBS=ON
418441
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
419442
420443
- name: Determine tag name

.github/workflows/gguf-publish.yml

+2-1
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,9 @@ jobs:
3636
poetry install
3737
3838
- name: Build package
39-
run: poetry build
39+
run: cd gguf-py && poetry build
4040
- name: Publish package
4141
uses: pypa/gh-action-pypi-publish@release/v1
4242
with:
4343
password: ${{ secrets.PYPI_API_TOKEN }}
44+
packages-dir: gguf-py/dist

.github/workflows/zig-build.yml

+25
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
name: Zig CI
2+
3+
on:
4+
pull_request:
5+
push:
6+
branches:
7+
- master
8+
9+
jobs:
10+
build:
11+
strategy:
12+
fail-fast: false
13+
matrix:
14+
runs-on: [ubuntu-latest, macos-latest, windows-latest]
15+
runs-on: ${{ matrix.runs-on }}
16+
steps:
17+
- uses: actions/checkout@v3
18+
with:
19+
submodules: recursive
20+
fetch-depth: 0
21+
- uses: goto-bus-stop/setup-zig@v2
22+
with:
23+
version: 0.11.0
24+
- name: Build Summary
25+
run: zig build --summary all -freference-trace

.gitignore

+4-1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
*.gcno
1111
*.gcda
1212
*.dot
13+
*.metallib
1314
.DS_Store
1415
.build/
1516
.cache/
@@ -40,6 +41,7 @@ models-mnt
4041
/embedding
4142
/gguf
4243
/gguf-llama-simple
44+
/infill
4345
/libllama.so
4446
/llama-bench
4547
/main
@@ -90,4 +92,5 @@ tests/test-quantize-perf
9092
tests/test-sampling
9193
tests/test-tokenizer-0-llama
9294
tests/test-tokenizer-0-falcon
93-
tests/test-tokenizer-1
95+
tests/test-tokenizer-1-llama
96+
tests/test-tokenizer-1-bpe

CMakeLists.txt

+45-23
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
cmake_minimum_required(VERSION 3.12) # Don't bump this version for no reason
1+
cmake_minimum_required(VERSION 3.13) # for add_link_options
22
project("llama.cpp" C CXX)
33

44
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
@@ -44,7 +44,7 @@ endif()
4444

4545
# general
4646
option(LLAMA_STATIC "llama: static link libraries" OFF)
47-
option(LLAMA_NATIVE "llama: enable -march=native flag" OFF)
47+
option(LLAMA_NATIVE "llama: enable -march=native flag" ON)
4848
option(LLAMA_LTO "llama: enable link time optimization" OFF)
4949

5050
# debug
@@ -58,15 +58,21 @@ option(LLAMA_SANITIZE_ADDRESS "llama: enable address sanitizer"
5858
option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer" OFF)
5959

6060
# instruction set specific
61-
option(LLAMA_AVX "llama: enable AVX" ON)
62-
option(LLAMA_AVX2 "llama: enable AVX2" ON)
63-
option(LLAMA_AVX512 "llama: enable AVX512" OFF)
64-
option(LLAMA_AVX512_VBMI "llama: enable AVX512-VBMI" OFF)
65-
option(LLAMA_AVX512_VNNI "llama: enable AVX512-VNNI" OFF)
66-
option(LLAMA_FMA "llama: enable FMA" ON)
61+
if (LLAMA_NATIVE)
62+
set(INS_ENB OFF)
63+
else()
64+
set(INS_ENB ON)
65+
endif()
66+
67+
option(LLAMA_AVX "llama: enable AVX" ${INS_ENB})
68+
option(LLAMA_AVX2 "llama: enable AVX2" ${INS_ENB})
69+
option(LLAMA_AVX512 "llama: enable AVX512" OFF)
70+
option(LLAMA_AVX512_VBMI "llama: enable AVX512-VBMI" OFF)
71+
option(LLAMA_AVX512_VNNI "llama: enable AVX512-VNNI" OFF)
72+
option(LLAMA_FMA "llama: enable FMA" ${INS_ENB})
6773
# in MSVC F16C is implied with AVX2/AVX512
6874
if (NOT MSVC)
69-
option(LLAMA_F16C "llama: enable F16C" ON)
75+
option(LLAMA_F16C "llama: enable F16C" ${INS_ENB})
7076
endif()
7177

7278
# 3rd party libs
@@ -343,8 +349,9 @@ if (LLAMA_MPI)
343349
set(GGML_SOURCES_MPI ggml-mpi.c ggml-mpi.h)
344350
add_compile_definitions(GGML_USE_MPI)
345351
add_compile_definitions(${MPI_C_COMPILE_DEFINITIONS})
346-
set(cxx_flags ${cxx_flags} -Wno-cast-qual)
347-
set(c_flags ${c_flags} -Wno-cast-qual)
352+
if (NOT MSVC)
353+
add_compile_options(-Wno-cast-qual)
354+
endif()
348355
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${MPI_C_LIBRARIES})
349356
set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${MPI_C_INCLUDE_DIRS})
350357
# Even if you're only using the C header, C++ programs may bring in MPI
@@ -418,10 +425,11 @@ if (LLAMA_ALL_WARNINGS)
418425
set(c_flags -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int
419426
-Werror=implicit-function-declaration)
420427
set(cxx_flags -Wmissing-declarations -Wmissing-noreturn)
428+
set(host_cxx_flags "")
421429

422430
if (CMAKE_C_COMPILER_ID MATCHES "Clang")
423431
set(warning_flags ${warning_flags} -Wunreachable-code-break -Wunreachable-code-return)
424-
set(cxx_flags ${cxx_flags} -Wmissing-prototypes -Wextra-semi)
432+
set(host_cxx_flags ${host_cxx_flags} -Wmissing-prototypes -Wextra-semi)
425433

426434
if (
427435
(CMAKE_C_COMPILER_ID STREQUAL "Clang" AND CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL 3.8.0) OR
@@ -431,27 +439,38 @@ if (LLAMA_ALL_WARNINGS)
431439
endif()
432440
elseif (CMAKE_C_COMPILER_ID STREQUAL "GNU")
433441
set(c_flags ${c_flags} -Wdouble-promotion)
434-
set(cxx_flags ${cxx_flags} -Wno-array-bounds)
442+
set(host_cxx_flags ${host_cxx_flags} -Wno-array-bounds)
435443

436444
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 7.1.0)
437-
set(cxx_flags ${cxx_flags} -Wno-format-truncation)
445+
set(host_cxx_flags ${host_cxx_flags} -Wno-format-truncation)
438446
endif()
439447
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 8.1.0)
440-
set(cxx_flags ${cxx_flags} -Wextra-semi)
448+
set(host_cxx_flags ${host_cxx_flags} -Wextra-semi)
441449
endif()
442450
endif()
443451
else()
444452
# todo : msvc
445453
endif()
446454

447-
add_compile_options(
448-
${warning_flags}
449-
"$<$<COMPILE_LANGUAGE:C>:${c_flags}>"
450-
"$<$<COMPILE_LANGUAGE:CXX>:${cxx_flags}>"
451-
)
455+
set(c_flags ${c_flags} ${warning_flags})
456+
set(cxx_flags ${cxx_flags} ${warning_flags})
457+
add_compile_options("$<$<COMPILE_LANGUAGE:C>:${c_flags}>"
458+
"$<$<COMPILE_LANGUAGE:CXX>:${cxx_flags} ${host_cxx_flags}>")
452459

453460
endif()
454461

462+
if (NOT MSVC)
463+
set(cuda_flags -Wno-pedantic)
464+
endif()
465+
set(cuda_flags ${cxx_flags} -use_fast_math ${cuda_flags})
466+
467+
list(JOIN host_cxx_flags " " cuda_host_flags) # pass host compiler flags as a single argument
468+
if (NOT cuda_host_flags STREQUAL "")
469+
set(cuda_flags ${cuda_flags} -Xcompiler ${cuda_host_flags})
470+
endif()
471+
472+
add_compile_options("$<$<COMPILE_LANGUAGE:CUDA>:${cuda_flags}>")
473+
455474
if (WIN32)
456475
add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
457476

@@ -491,9 +510,6 @@ if (NOT MSVC)
491510
if (LLAMA_GPROF)
492511
add_compile_options(-pg)
493512
endif()
494-
if (LLAMA_NATIVE)
495-
add_compile_options(-march=native)
496-
endif()
497513
endif()
498514

499515
if ((${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") OR (${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64") OR ("${CMAKE_GENERATOR_PLATFORM_LWR}" MATCHES "arm64"))
@@ -548,6 +564,9 @@ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$" OR "${CMAKE_GE
548564
add_compile_options($<$<COMPILE_LANGUAGE:CXX>:/arch:AVX>)
549565
endif()
550566
else()
567+
if (LLAMA_NATIVE)
568+
add_compile_options(-march=native)
569+
endif()
551570
if (LLAMA_F16C)
552571
add_compile_options(-mf16c)
553572
endif()
@@ -644,6 +663,8 @@ add_library(ggml OBJECT
644663
ggml.h
645664
ggml-alloc.c
646665
ggml-alloc.h
666+
ggml-backend.c
667+
ggml-backend.h
647668
${GGML_SOURCES_CUDA} ${GGML_HEADERS_CUDA}
648669
${GGML_SOURCES_OPENCL} ${GGML_HEADERS_OPENCL}
649670
${GGML_SOURCES_METAL} ${GGML_HEADERS_METAL}
@@ -705,6 +726,7 @@ set(LLAMA_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR}
705726
set(LLAMA_BUILD_NUMBER ${BUILD_NUMBER})
706727
set(LLAMA_BUILD_COMMIT ${BUILD_COMMIT})
707728
set(LLAMA_INSTALL_VERSION 0.0.${BUILD_NUMBER})
729+
get_directory_property(LLAMA_TRANSIENT_DEFINES COMPILE_DEFINITIONS)
708730

709731
configure_package_config_file(
710732
${CMAKE_CURRENT_SOURCE_DIR}/scripts/LlamaConfig.cmake.in

Makefile

+16-5
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
# Define the default target now so that it is always the first target
2-
BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml simple batched save-load-state server embd-input-test gguf llama-bench baby-llama beam-search speculative benchmark-matmult parallel finetune export-lora tests/test-c.o
2+
BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml simple batched save-load-state server embd-input-test gguf llama-bench baby-llama beam-search speculative infill benchmark-matmult parallel finetune export-lora tests/test-c.o
33

44
# Binaries only useful for tests
5-
TEST_TARGETS = tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama
5+
TEST_TARGETS = tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe
66

77
# Code coverage output files
88
COV_TARGETS = *.gcno tests/*.gcno *.gcda tests/*.gcda *.gcov tests/*.gcov lcov-report gcovr-report
@@ -62,9 +62,11 @@ test: $(TEST_TARGETS)
6262
if [ "$$test_target" = "tests/test-tokenizer-0-llama" ]; then \
6363
./$$test_target $(CURDIR)/models/ggml-vocab-llama.gguf; \
6464
elif [ "$$test_target" = "tests/test-tokenizer-0-falcon" ]; then \
65-
continue; \
65+
./$$test_target $(CURDIR)/models/ggml-vocab-falcon.gguf; \
6666
elif [ "$$test_target" = "tests/test-tokenizer-1-llama" ]; then \
6767
continue; \
68+
elif [ "$$test_target" = "tests/test-tokenizer-1-bpe" ]; then \
69+
continue; \
6870
else \
6971
echo "Running test $$test_target..."; \
7072
./$$test_target; \
@@ -510,9 +512,12 @@ ggml.o: ggml.c ggml.h ggml-cuda.h
510512
ggml-alloc.o: ggml-alloc.c ggml.h ggml-alloc.h
511513
$(CC) $(CFLAGS) -c $< -o $@
512514

513-
OBJS += ggml-alloc.o
515+
ggml-backend.o: ggml-backend.c ggml.h ggml-backend.h
516+
$(CC) $(CFLAGS) -c $< -o $@
517+
518+
OBJS += ggml-alloc.o ggml-backend.o
514519

515-
llama.o: llama.cpp ggml.h ggml-alloc.h ggml-cuda.h ggml-metal.h llama.h
520+
llama.o: llama.cpp ggml.h ggml-alloc.h ggml-backend.h ggml-cuda.h ggml-metal.h llama.h
516521
$(CXX) $(CXXFLAGS) -c $< -o $@
517522

518523
common.o: common/common.cpp common/common.h build-info.h common/log.h
@@ -543,6 +548,9 @@ main: examples/main/main.cpp build-info.h ggml.
543548
@echo '==== Run ./main -h for help. ===='
544549
@echo
545550

551+
infill: examples/infill/infill.cpp build-info.h ggml.o llama.o common.o console.o grammar-parser.o $(OBJS)
552+
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
553+
546554
simple: examples/simple/simple.cpp build-info.h ggml.o llama.o common.o $(OBJS)
547555
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
548556

@@ -667,6 +675,9 @@ tests/test-tokenizer-0-falcon: tests/test-tokenizer-0-falcon.cpp build-info.h gg
667675
tests/test-tokenizer-0-llama: tests/test-tokenizer-0-llama.cpp build-info.h ggml.o llama.o common.o $(OBJS)
668676
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
669677

678+
tests/test-tokenizer-1-bpe: tests/test-tokenizer-1-bpe.cpp build-info.h ggml.o llama.o common.o $(OBJS)
679+
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
680+
670681
tests/test-tokenizer-1-llama: tests/test-tokenizer-1-llama.cpp build-info.h ggml.o llama.o common.o $(OBJS)
671682
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
672683

0 commit comments

Comments
 (0)