nalbion commited on
Commit
3b6a58b
·
unverified ·
1 Parent(s): 828e876

bindings : add java bindings (#931)

Browse files

* WIP - java bindings

* updated README

* failed attempt at JNI

* fullTranscribe() test passes

* tested on Ubuntu 20

* link to Java bindings

Files changed (36) hide show
  1. .gitignore +46 -43
  2. README.md +1 -0
  3. bindings/java/.idea/uiDesigner.xml +124 -0
  4. bindings/java/CMakeLists.txt +50 -0
  5. bindings/java/README.md +63 -0
  6. bindings/java/build.gradle +104 -0
  7. bindings/java/gradle.properties +6 -0
  8. bindings/java/gradle/wrapper/gradle-wrapper.jar +0 -0
  9. bindings/java/gradle/wrapper/gradle-wrapper.properties +6 -0
  10. bindings/java/gradlew +244 -0
  11. bindings/java/gradlew.bat +92 -0
  12. bindings/java/settings.gradle +1 -0
  13. bindings/java/src/main/cpp/whisper_java.cpp +33 -0
  14. bindings/java/src/main/cpp/whisper_java.h +24 -0
  15. bindings/java/src/main/java/io/github/ggerganov/whispercpp/WhisperContext.java +39 -0
  16. bindings/java/src/main/java/io/github/ggerganov/whispercpp/WhisperCpp.java +124 -0
  17. bindings/java/src/main/java/io/github/ggerganov/whispercpp/WhisperCppJnaLibrary.java +365 -0
  18. bindings/java/src/main/java/io/github/ggerganov/whispercpp/WhisperJavaJnaLibrary.java +23 -0
  19. bindings/java/src/main/java/io/github/ggerganov/whispercpp/callbacks/WhisperEncoderBeginCallback.java +24 -0
  20. bindings/java/src/main/java/io/github/ggerganov/whispercpp/callbacks/WhisperLogitsFilterCallback.java +28 -0
  21. bindings/java/src/main/java/io/github/ggerganov/whispercpp/callbacks/WhisperNewSegmentCallback.java +24 -0
  22. bindings/java/src/main/java/io/github/ggerganov/whispercpp/callbacks/WhisperProgressCallback.java +23 -0
  23. bindings/java/src/main/java/io/github/ggerganov/whispercpp/ggml/GgmlTensor.java +4 -0
  24. bindings/java/src/main/java/io/github/ggerganov/whispercpp/ggml/GgmlType.java +18 -0
  25. bindings/java/src/main/java/io/github/ggerganov/whispercpp/model/EModel.java +10 -0
  26. bindings/java/src/main/java/io/github/ggerganov/whispercpp/model/WhisperModel.java +49 -0
  27. bindings/java/src/main/java/io/github/ggerganov/whispercpp/model/WhisperModelLoader.java +62 -0
  28. bindings/java/src/main/java/io/github/ggerganov/whispercpp/model/WhisperState.java +4 -0
  29. bindings/java/src/main/java/io/github/ggerganov/whispercpp/model/WhisperTokenData.java +50 -0
  30. bindings/java/src/main/java/io/github/ggerganov/whispercpp/params/WhisperFilters.java +10 -0
  31. bindings/java/src/main/java/io/github/ggerganov/whispercpp/params/WhisperFullParams.java +187 -0
  32. bindings/java/src/main/java/io/github/ggerganov/whispercpp/params/WhisperHParams.java +15 -0
  33. bindings/java/src/main/java/io/github/ggerganov/whispercpp/params/WhisperJavaParams.java +7 -0
  34. bindings/java/src/main/java/io/github/ggerganov/whispercpp/params/WhisperSamplingStrategy.java +10 -0
  35. bindings/java/src/test/java/io/github/ggerganov/whispercpp/WhisperCppTest.java +75 -0
  36. bindings/java/src/test/java/io/github/ggerganov/whispercpp/WhisperJnaLibraryTest.java +17 -0
.gitignore CHANGED
@@ -1,43 +1,46 @@
1
- *.o
2
- *.a
3
- .cache/
4
- .coreml/
5
- .test/
6
- .vs/
7
- .vscode/
8
- .DS_Store
9
-
10
- build/
11
- build-em/
12
- build-debug/
13
- build-release/
14
- build-static/
15
- build-cublas/
16
- build-no-accel/
17
- build-sanitize-addr/
18
- build-sanitize-thread/
19
-
20
- /main
21
- /stream
22
- /command
23
- /talk
24
- /talk-llama
25
- /bench
26
- /quantize
27
-
28
- arm_neon.h
29
- sync.sh
30
- libwhisper.a
31
- libwhisper.so
32
- compile_commands.json
33
-
34
- examples/arm_neon.h
35
- examples/whisper.objc/whisper.objc.xcodeproj/xcshareddata
36
- examples/whisper.objc/whisper.objc.xcodeproj/xcuserdata/
37
- examples/whisper.objc/whisper.objc.xcodeproj/project.xcworkspace/xcuserdata
38
-
39
- extra/bench-gg.txt
40
-
41
- models/*.mlmodel
42
- models/*.mlmodelc
43
- models/*.mlpackage
 
 
 
 
1
+ *.o
2
+ *.a
3
+ .cache/
4
+ .coreml/
5
+ .test/
6
+ .vs/
7
+ .vscode/
8
+ .DS_Store
9
+
10
+ build/
11
+ build-em/
12
+ build-debug/
13
+ build-release/
14
+ build-static/
15
+ build-cublas/
16
+ build-no-accel/
17
+ build-sanitize-addr/
18
+ build-sanitize-thread/
19
+
20
+ /main
21
+ /stream
22
+ /command
23
+ /talk
24
+ /talk-llama
25
+ /bench
26
+ /quantize
27
+
28
+ arm_neon.h
29
+ sync.sh
30
+ libwhisper.a
31
+ libwhisper.so
32
+ compile_commands.json
33
+
34
+ examples/arm_neon.h
35
+ examples/whisper.objc/whisper.objc.xcodeproj/xcshareddata
36
+ examples/whisper.objc/whisper.objc.xcodeproj/xcuserdata/
37
+ examples/whisper.objc/whisper.objc.xcodeproj/project.xcworkspace/xcuserdata
38
+
39
+ extra/bench-gg.txt
40
+
41
+ models/*.mlmodel
42
+ models/*.mlmodelc
43
+ models/*.mlpackage
44
+ bindings/java/.gradle/
45
+ bindings/java/.idea/
46
+ .idea/
README.md CHANGED
@@ -28,6 +28,7 @@ Supported platforms:
28
  - [x] Mac OS (Intel and Arm)
29
  - [x] [iOS](examples/whisper.objc)
30
  - [x] [Android](examples/whisper.android)
 
31
  - [x] Linux / [FreeBSD](https://github.com/ggerganov/whisper.cpp/issues/56#issuecomment-1350920264)
32
  - [x] [WebAssembly](examples/whisper.wasm)
33
  - [x] Windows ([MSVC](https://github.com/ggerganov/whisper.cpp/blob/master/.github/workflows/build.yml#L117-L144) and [MinGW](https://github.com/ggerganov/whisper.cpp/issues/168)]
 
28
  - [x] Mac OS (Intel and Arm)
29
  - [x] [iOS](examples/whisper.objc)
30
  - [x] [Android](examples/whisper.android)
31
+ - [x] [Java](bindings/java/README.md)
32
  - [x] Linux / [FreeBSD](https://github.com/ggerganov/whisper.cpp/issues/56#issuecomment-1350920264)
33
  - [x] [WebAssembly](examples/whisper.wasm)
34
  - [x] Windows ([MSVC](https://github.com/ggerganov/whisper.cpp/blob/master/.github/workflows/build.yml#L117-L144) and [MinGW](https://github.com/ggerganov/whisper.cpp/issues/168)]
bindings/java/.idea/uiDesigner.xml ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="Palette2">
4
+ <group name="Swing">
5
+ <item class="com.intellij.uiDesigner.HSpacer" tooltip-text="Horizontal Spacer" icon="/com/intellij/uiDesigner/icons/hspacer.svg" removable="false" auto-create-binding="false" can-attach-label="false">
6
+ <default-constraints vsize-policy="1" hsize-policy="6" anchor="0" fill="1" />
7
+ </item>
8
+ <item class="com.intellij.uiDesigner.VSpacer" tooltip-text="Vertical Spacer" icon="/com/intellij/uiDesigner/icons/vspacer.svg" removable="false" auto-create-binding="false" can-attach-label="false">
9
+ <default-constraints vsize-policy="6" hsize-policy="1" anchor="0" fill="2" />
10
+ </item>
11
+ <item class="javax.swing.JPanel" icon="/com/intellij/uiDesigner/icons/panel.svg" removable="false" auto-create-binding="false" can-attach-label="false">
12
+ <default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3" />
13
+ </item>
14
+ <item class="javax.swing.JScrollPane" icon="/com/intellij/uiDesigner/icons/scrollPane.svg" removable="false" auto-create-binding="false" can-attach-label="true">
15
+ <default-constraints vsize-policy="7" hsize-policy="7" anchor="0" fill="3" />
16
+ </item>
17
+ <item class="javax.swing.JButton" icon="/com/intellij/uiDesigner/icons/button.svg" removable="false" auto-create-binding="true" can-attach-label="false">
18
+ <default-constraints vsize-policy="0" hsize-policy="3" anchor="0" fill="1" />
19
+ <initial-values>
20
+ <property name="text" value="Button" />
21
+ </initial-values>
22
+ </item>
23
+ <item class="javax.swing.JRadioButton" icon="/com/intellij/uiDesigner/icons/radioButton.svg" removable="false" auto-create-binding="true" can-attach-label="false">
24
+ <default-constraints vsize-policy="0" hsize-policy="3" anchor="8" fill="0" />
25
+ <initial-values>
26
+ <property name="text" value="RadioButton" />
27
+ </initial-values>
28
+ </item>
29
+ <item class="javax.swing.JCheckBox" icon="/com/intellij/uiDesigner/icons/checkBox.svg" removable="false" auto-create-binding="true" can-attach-label="false">
30
+ <default-constraints vsize-policy="0" hsize-policy="3" anchor="8" fill="0" />
31
+ <initial-values>
32
+ <property name="text" value="CheckBox" />
33
+ </initial-values>
34
+ </item>
35
+ <item class="javax.swing.JLabel" icon="/com/intellij/uiDesigner/icons/label.svg" removable="false" auto-create-binding="false" can-attach-label="false">
36
+ <default-constraints vsize-policy="0" hsize-policy="0" anchor="8" fill="0" />
37
+ <initial-values>
38
+ <property name="text" value="Label" />
39
+ </initial-values>
40
+ </item>
41
+ <item class="javax.swing.JTextField" icon="/com/intellij/uiDesigner/icons/textField.svg" removable="false" auto-create-binding="true" can-attach-label="true">
42
+ <default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
43
+ <preferred-size width="150" height="-1" />
44
+ </default-constraints>
45
+ </item>
46
+ <item class="javax.swing.JPasswordField" icon="/com/intellij/uiDesigner/icons/passwordField.svg" removable="false" auto-create-binding="true" can-attach-label="true">
47
+ <default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
48
+ <preferred-size width="150" height="-1" />
49
+ </default-constraints>
50
+ </item>
51
+ <item class="javax.swing.JFormattedTextField" icon="/com/intellij/uiDesigner/icons/formattedTextField.svg" removable="false" auto-create-binding="true" can-attach-label="true">
52
+ <default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
53
+ <preferred-size width="150" height="-1" />
54
+ </default-constraints>
55
+ </item>
56
+ <item class="javax.swing.JTextArea" icon="/com/intellij/uiDesigner/icons/textArea.svg" removable="false" auto-create-binding="true" can-attach-label="true">
57
+ <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
58
+ <preferred-size width="150" height="50" />
59
+ </default-constraints>
60
+ </item>
61
+ <item class="javax.swing.JTextPane" icon="/com/intellij/uiDesigner/icons/textPane.svg" removable="false" auto-create-binding="true" can-attach-label="true">
62
+ <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
63
+ <preferred-size width="150" height="50" />
64
+ </default-constraints>
65
+ </item>
66
+ <item class="javax.swing.JEditorPane" icon="/com/intellij/uiDesigner/icons/editorPane.svg" removable="false" auto-create-binding="true" can-attach-label="true">
67
+ <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
68
+ <preferred-size width="150" height="50" />
69
+ </default-constraints>
70
+ </item>
71
+ <item class="javax.swing.JComboBox" icon="/com/intellij/uiDesigner/icons/comboBox.svg" removable="false" auto-create-binding="true" can-attach-label="true">
72
+ <default-constraints vsize-policy="0" hsize-policy="2" anchor="8" fill="1" />
73
+ </item>
74
+ <item class="javax.swing.JTable" icon="/com/intellij/uiDesigner/icons/table.svg" removable="false" auto-create-binding="true" can-attach-label="false">
75
+ <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
76
+ <preferred-size width="150" height="50" />
77
+ </default-constraints>
78
+ </item>
79
+ <item class="javax.swing.JList" icon="/com/intellij/uiDesigner/icons/list.svg" removable="false" auto-create-binding="true" can-attach-label="false">
80
+ <default-constraints vsize-policy="6" hsize-policy="2" anchor="0" fill="3">
81
+ <preferred-size width="150" height="50" />
82
+ </default-constraints>
83
+ </item>
84
+ <item class="javax.swing.JTree" icon="/com/intellij/uiDesigner/icons/tree.svg" removable="false" auto-create-binding="true" can-attach-label="false">
85
+ <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
86
+ <preferred-size width="150" height="50" />
87
+ </default-constraints>
88
+ </item>
89
+ <item class="javax.swing.JTabbedPane" icon="/com/intellij/uiDesigner/icons/tabbedPane.svg" removable="false" auto-create-binding="true" can-attach-label="false">
90
+ <default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3">
91
+ <preferred-size width="200" height="200" />
92
+ </default-constraints>
93
+ </item>
94
+ <item class="javax.swing.JSplitPane" icon="/com/intellij/uiDesigner/icons/splitPane.svg" removable="false" auto-create-binding="false" can-attach-label="false">
95
+ <default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3">
96
+ <preferred-size width="200" height="200" />
97
+ </default-constraints>
98
+ </item>
99
+ <item class="javax.swing.JSpinner" icon="/com/intellij/uiDesigner/icons/spinner.svg" removable="false" auto-create-binding="true" can-attach-label="true">
100
+ <default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1" />
101
+ </item>
102
+ <item class="javax.swing.JSlider" icon="/com/intellij/uiDesigner/icons/slider.svg" removable="false" auto-create-binding="true" can-attach-label="false">
103
+ <default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1" />
104
+ </item>
105
+ <item class="javax.swing.JSeparator" icon="/com/intellij/uiDesigner/icons/separator.svg" removable="false" auto-create-binding="false" can-attach-label="false">
106
+ <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3" />
107
+ </item>
108
+ <item class="javax.swing.JProgressBar" icon="/com/intellij/uiDesigner/icons/progressbar.svg" removable="false" auto-create-binding="true" can-attach-label="false">
109
+ <default-constraints vsize-policy="0" hsize-policy="6" anchor="0" fill="1" />
110
+ </item>
111
+ <item class="javax.swing.JToolBar" icon="/com/intellij/uiDesigner/icons/toolbar.svg" removable="false" auto-create-binding="false" can-attach-label="false">
112
+ <default-constraints vsize-policy="0" hsize-policy="6" anchor="0" fill="1">
113
+ <preferred-size width="-1" height="20" />
114
+ </default-constraints>
115
+ </item>
116
+ <item class="javax.swing.JToolBar$Separator" icon="/com/intellij/uiDesigner/icons/toolbarSeparator.svg" removable="false" auto-create-binding="false" can-attach-label="false">
117
+ <default-constraints vsize-policy="0" hsize-policy="0" anchor="0" fill="1" />
118
+ </item>
119
+ <item class="javax.swing.JScrollBar" icon="/com/intellij/uiDesigner/icons/scrollbar.svg" removable="false" auto-create-binding="true" can-attach-label="false">
120
+ <default-constraints vsize-policy="6" hsize-policy="0" anchor="0" fill="2" />
121
+ </item>
122
+ </group>
123
+ </component>
124
+ </project>
bindings/java/CMakeLists.txt ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ cmake_minimum_required(VERSION 3.10)
2
+
3
+ project(whisper_java VERSION 1.4.2)
4
+
5
+ # Set the target name and source file/s
6
+ set(TARGET_NAME whisper_java)
7
+ set(SOURCES src/main/cpp/whisper_java.cpp)
8
+
9
+ # include <whisper.h>
10
+ include_directories(../../)
11
+
12
+ # Set the output directory for the DLL/shared library based on the platform as required by JNA
13
+ if(WIN32)
14
+ set(OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/generated/resources/main/win32-x86-64)
15
+ elseif(UNIX AND NOT APPLE)
16
+ set(OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/generated/resources/main/linux-x86-64)
17
+ elseif(APPLE)
18
+ set(OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/generated/resources/main/macos-x86-64)
19
+ endif()
20
+
21
+ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${OUTPUT_DIR})
22
+ set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${OUTPUT_DIR})
23
+
24
+ # Create the whisper_java library
25
+ add_library(${TARGET_NAME} SHARED ${SOURCES})
26
+
27
+ # Link against ../../build/Release/whisper.dll (or so/dynlib)
28
+ target_link_directories(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/../../../build/${CMAKE_BUILD_TYPE})
29
+ target_link_libraries(${TARGET_NAME} PRIVATE whisper)
30
+
31
+ # Set the appropriate compiler flags for Windows, Linux, and macOS
32
+ if(WIN32)
33
+ target_compile_options(${TARGET_NAME} PRIVATE /W4 /D_CRT_SECURE_NO_WARNINGS)
34
+ elseif(UNIX AND NOT APPLE)
35
+ target_compile_options(${TARGET_NAME} PRIVATE -Wall -Wextra)
36
+ elseif(APPLE)
37
+ target_compile_options(${TARGET_NAME} PRIVATE -Wall -Wextra)
38
+ endif()
39
+
40
+ target_compile_definitions(${TARGET_NAME} PRIVATE WHISPER_SHARED)
41
+ # add_definitions(-DWHISPER_SHARED)
42
+
43
+ # Force CMake to save the libs to build/generated/resources/main/${os}-${arch} as required by JNA
44
+ foreach(OUTPUTCONFIG ${CMAKE_CONFIGURATION_TYPES})
45
+ string(TOUPPER ${OUTPUTCONFIG} OUTPUTCONFIG)
46
+ set_target_properties(${TARGET_NAME} PROPERTIES
47
+ RUNTIME_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${OUTPUT_DIR}
48
+ LIBRARY_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${OUTPUT_DIR}
49
+ ARCHIVE_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${OUTPUT_DIR})
50
+ endforeach(OUTPUTCONFIG CMAKE_CONFIGURATION_TYPES)
bindings/java/README.md ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Java JNI bindings for Whisper
2
+
3
+ This package provides Java JNI bindings for whisper.cpp. They have been tested on:
4
+
5
+ * <strike>Darwin (OS X) 12.6 on x64_64</strike>
6
+ * Ubuntu on x86_64
7
+ * Windows on x86_64
8
+
9
+ The "low level" bindings are in `WhisperCppJnaLibrary` and `WhisperJavaJnaLibrary` which caches `whisper_full_params` and `whisper_context` in `whisper_java.cpp`.
10
+
11
+ There are a lot of classes in the `callbacks`, `ggml`, `model` and `params` directories but most of them have not been tested.
12
+
13
+ The most simple usage is as follows:
14
+
15
+ ```java
16
+ import io.github.ggerganov.whispercpp.WhisperCpp;
17
+
18
+ public class Example {
19
+
20
+ public static void main(String[] args) {
21
+ String modelpath;
22
+ WhisperCpp whisper = new WhisperCpp();
23
+ // By default, models are loaded from ~/.cache/whisper/ and are usually named "ggml-${name}.bin"
24
+ // or you can provide the absolute path to the model file.
25
+ whisper.initContext("base.en");
26
+
27
+ long context = whisper.initContext(modelpath);
28
+ try {
29
+ whisper.fullTranscribe(context, samples);
30
+
31
+ int segmentCount = whisper.getTextSegmentCount(context);
32
+ for (int i = 0; i < segmentCount; i++) {
33
+ String text = whisper.getTextSegment(context, i);
34
+ System.out.println(segment.getText());
35
+ }
36
+ } finally {
37
+ whisper.freeContext(context);
38
+ }
39
+ }
40
+ }
41
+ ```
42
+
43
+ ## Building & Testing
44
+
45
+ In order to build, you need to have the JDK 8 or higher installed. Run the tests with:
46
+
47
+ ```bash
48
+ git clone https://github.com/ggerganov/whisper.cpp.git
49
+ cd whisper.cpp/bindings/java
50
+
51
+ mkdir build
52
+ pushd build
53
+ cmake ..
54
+ cmake --build .
55
+ popd
56
+
57
+ ./gradlew build
58
+ ```
59
+
60
+ ## License
61
+
62
+ The license for the Go bindings is the same as the license for the rest of the whisper.cpp project, which is the MIT License. See the `LICENSE` file for more details.
63
+
bindings/java/build.gradle ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ plugins {
2
+ id 'java'
3
+ id 'java-library'
4
+ id 'maven-publish'
5
+ }
6
+
7
+ archivesBaseName = 'whispercpp'
8
+ group = 'io.github.ggerganov'
9
+ version = '1.4.0'
10
+
11
+ sourceCompatibility = 1.8
12
+ targetCompatibility = 1.8
13
+
14
+ sourceSets {
15
+ main {
16
+ resources {
17
+ srcDirs = ['src/main/resources', 'build/generated/resources/main']
18
+ }
19
+ }
20
+ test {
21
+ runtimeClasspath += files('build/generated/resources/main')
22
+ }
23
+ }
24
+
25
+ tasks.register('copyLibwhisperSo', Copy) {
26
+ from '../../build'
27
+ include 'libwhisper.so'
28
+ into 'build/generated/resources/main/linux-x86-64'
29
+ }
30
+
31
+ tasks.register('copyWhisperDll', Copy) {
32
+ from '../../build/Release'
33
+ include 'whisper.dll'
34
+ into 'build/generated/resources/main/windows-x86-64'
35
+ }
36
+
37
+ tasks.build.dependsOn copyLibwhisperSo, copyWhisperDll
38
+
39
+ test {
40
+ systemProperty 'jna.library.path', project.file('build/generated/resources/main').absolutePath
41
+ }
42
+
43
+ java {
44
+ withSourcesJar()
45
+ withJavadocJar()
46
+ }
47
+
48
+ jar {
49
+ exclude '**/whisper_java.exp', '**/whisper_java.lib'
50
+ }
51
+
52
+ javadoc {
53
+ options.addStringOption('Xdoclint:none', '-quiet')
54
+ }
55
+
56
+ tasks.withType(Test) {
57
+ useJUnitPlatform()
58
+ }
59
+
60
+ dependencies {
61
+ implementation "net.java.dev.jna:jna:5.13.0"
62
+ testImplementation "org.junit.jupiter:junit-jupiter:5.9.2"
63
+ testImplementation "org.assertj:assertj-core:3.24.2"
64
+ }
65
+
66
+ repositories {
67
+ mavenCentral()
68
+ }
69
+
70
+ publishing {
71
+ publications {
72
+ mavenJava(MavenPublication) {
73
+ artifactId = 'whispercpp'
74
+ from components.java
75
+ pom {
76
+ name = 'whispercpp'
77
+ description = "Java JNA bindings for OpenAI's Whisper model, implemented in C/C++"
78
+ url = 'https://github.com/ggerganov/whisper.cpp'
79
+ licenses {
80
+ license {
81
+ name = 'MIT licence'
82
+ url = 'https://raw.githubusercontent.com/ggerganov/whisper.cpp/master/LICENSE'
83
+ }
84
+ }
85
+ developers {
86
+ developer {
87
+ id = 'ggerganov'
88
+ name = 'Georgi Gerganov'
89
+ email = '[email protected]'
90
+ }
91
+ developer {
92
+ id = 'nalbion'
93
+ name = 'Nicholas Albion'
94
+ email = '[email protected]'
95
+ }
96
+ }
97
+ scm {
98
+ connection = 'scm:git:git://github.com/ggerganov/whisper.cpp.git'
99
+ url = 'https://github.com/ggerganov/whisper.cpp'
100
+ }
101
+ }
102
+ }
103
+ }
104
+ }
bindings/java/gradle.properties ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ org.gradle.jvmargs=-Xms256m -Xmx1024m
2
+ system.include.dir=/usr/include
3
+ #system.local.include.dir=../../include
4
+ system.local.include.dir=./build/generated/sources/headers/java/main
5
+ jni.include.dir=/usr/lib/jvm/java-8-openjdk-amd64/include/
6
+ jni.lib.dir=/usr/lib/jvm/java-8-openjdk-amd64/lib/
bindings/java/gradle/wrapper/gradle-wrapper.jar ADDED
Binary file (61.6 kB). View file
 
bindings/java/gradle/wrapper/gradle-wrapper.properties ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ distributionBase=GRADLE_USER_HOME
2
+ distributionPath=wrapper/dists
3
+ distributionUrl=https\://services.gradle.org/distributions/gradle-8.1-bin.zip
4
+ networkTimeout=10000
5
+ zipStoreBase=GRADLE_USER_HOME
6
+ zipStorePath=wrapper/dists
bindings/java/gradlew ADDED
@@ -0,0 +1,244 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/sh
2
+
3
+ #
4
+ # Copyright © 2015-2021 the original authors.
5
+ #
6
+ # Licensed under the Apache License, Version 2.0 (the "License");
7
+ # you may not use this file except in compliance with the License.
8
+ # You may obtain a copy of the License at
9
+ #
10
+ # https://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ # See the License for the specific language governing permissions and
16
+ # limitations under the License.
17
+ #
18
+
19
+ ##############################################################################
20
+ #
21
+ # Gradle start up script for POSIX generated by Gradle.
22
+ #
23
+ # Important for running:
24
+ #
25
+ # (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is
26
+ # noncompliant, but you have some other compliant shell such as ksh or
27
+ # bash, then to run this script, type that shell name before the whole
28
+ # command line, like:
29
+ #
30
+ # ksh Gradle
31
+ #
32
+ # Busybox and similar reduced shells will NOT work, because this script
33
+ # requires all of these POSIX shell features:
34
+ # * functions;
35
+ # * expansions «$var», «${var}», «${var:-default}», «${var+SET}»,
36
+ # «${var#prefix}», «${var%suffix}», and «$( cmd )»;
37
+ # * compound commands having a testable exit status, especially «case»;
38
+ # * various built-in commands including «command», «set», and «ulimit».
39
+ #
40
+ # Important for patching:
41
+ #
42
+ # (2) This script targets any POSIX shell, so it avoids extensions provided
43
+ # by Bash, Ksh, etc; in particular arrays are avoided.
44
+ #
45
+ # The "traditional" practice of packing multiple parameters into a
46
+ # space-separated string is a well documented source of bugs and security
47
+ # problems, so this is (mostly) avoided, by progressively accumulating
48
+ # options in "$@", and eventually passing that to Java.
49
+ #
50
+ # Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS,
51
+ # and GRADLE_OPTS) rely on word-splitting, this is performed explicitly;
52
+ # see the in-line comments for details.
53
+ #
54
+ # There are tweaks for specific operating systems such as AIX, CygWin,
55
+ # Darwin, MinGW, and NonStop.
56
+ #
57
+ # (3) This script is generated from the Groovy template
58
+ # https://github.com/gradle/gradle/blob/HEAD/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
59
+ # within the Gradle project.
60
+ #
61
+ # You can find Gradle at https://github.com/gradle/gradle/.
62
+ #
63
+ ##############################################################################
64
+
65
+ # Attempt to set APP_HOME
66
+
67
+ # Resolve links: $0 may be a link
68
+ app_path=$0
69
+
70
+ # Need this for daisy-chained symlinks.
71
+ while
72
+ APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path
73
+ [ -h "$app_path" ]
74
+ do
75
+ ls=$( ls -ld "$app_path" )
76
+ link=${ls#*' -> '}
77
+ case $link in #(
78
+ /*) app_path=$link ;; #(
79
+ *) app_path=$APP_HOME$link ;;
80
+ esac
81
+ done
82
+
83
+ # This is normally unused
84
+ # shellcheck disable=SC2034
85
+ APP_BASE_NAME=${0##*/}
86
+ APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit
87
+
88
+ # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
89
+ DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
90
+
91
+ # Use the maximum available, or set MAX_FD != -1 to use that value.
92
+ MAX_FD=maximum
93
+
94
+ warn () {
95
+ echo "$*"
96
+ } >&2
97
+
98
+ die () {
99
+ echo
100
+ echo "$*"
101
+ echo
102
+ exit 1
103
+ } >&2
104
+
105
+ # OS specific support (must be 'true' or 'false').
106
+ cygwin=false
107
+ msys=false
108
+ darwin=false
109
+ nonstop=false
110
+ case "$( uname )" in #(
111
+ CYGWIN* ) cygwin=true ;; #(
112
+ Darwin* ) darwin=true ;; #(
113
+ MSYS* | MINGW* ) msys=true ;; #(
114
+ NONSTOP* ) nonstop=true ;;
115
+ esac
116
+
117
+ CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
118
+
119
+
120
+ # Determine the Java command to use to start the JVM.
121
+ if [ -n "$JAVA_HOME" ] ; then
122
+ if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
123
+ # IBM's JDK on AIX uses strange locations for the executables
124
+ JAVACMD=$JAVA_HOME/jre/sh/java
125
+ else
126
+ JAVACMD=$JAVA_HOME/bin/java
127
+ fi
128
+ if [ ! -x "$JAVACMD" ] ; then
129
+ die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
130
+
131
+ Please set the JAVA_HOME variable in your environment to match the
132
+ location of your Java installation."
133
+ fi
134
+ else
135
+ JAVACMD=java
136
+ which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
137
+
138
+ Please set the JAVA_HOME variable in your environment to match the
139
+ location of your Java installation."
140
+ fi
141
+
142
+ # Increase the maximum file descriptors if we can.
143
+ if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
144
+ case $MAX_FD in #(
145
+ max*)
146
+ # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked.
147
+ # shellcheck disable=SC3045
148
+ MAX_FD=$( ulimit -H -n ) ||
149
+ warn "Could not query maximum file descriptor limit"
150
+ esac
151
+ case $MAX_FD in #(
152
+ '' | soft) :;; #(
153
+ *)
154
+ # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked.
155
+ # shellcheck disable=SC3045
156
+ ulimit -n "$MAX_FD" ||
157
+ warn "Could not set maximum file descriptor limit to $MAX_FD"
158
+ esac
159
+ fi
160
+
161
+ # Collect all arguments for the java command, stacking in reverse order:
162
+ # * args from the command line
163
+ # * the main class name
164
+ # * -classpath
165
+ # * -D...appname settings
166
+ # * --module-path (only if needed)
167
+ # * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables.
168
+
169
+ # For Cygwin or MSYS, switch paths to Windows format before running java
170
+ if "$cygwin" || "$msys" ; then
171
+ APP_HOME=$( cygpath --path --mixed "$APP_HOME" )
172
+ CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" )
173
+
174
+ JAVACMD=$( cygpath --unix "$JAVACMD" )
175
+
176
+ # Now convert the arguments - kludge to limit ourselves to /bin/sh
177
+ for arg do
178
+ if
179
+ case $arg in #(
180
+ -*) false ;; # don't mess with options #(
181
+ /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath
182
+ [ -e "$t" ] ;; #(
183
+ *) false ;;
184
+ esac
185
+ then
186
+ arg=$( cygpath --path --ignore --mixed "$arg" )
187
+ fi
188
+ # Roll the args list around exactly as many times as the number of
189
+ # args, so each arg winds up back in the position where it started, but
190
+ # possibly modified.
191
+ #
192
+ # NB: a `for` loop captures its iteration list before it begins, so
193
+ # changing the positional parameters here affects neither the number of
194
+ # iterations, nor the values presented in `arg`.
195
+ shift # remove old arg
196
+ set -- "$@" "$arg" # push replacement arg
197
+ done
198
+ fi
199
+
200
+ # Collect all arguments for the java command;
201
+ # * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of
202
+ # shell script including quotes and variable substitutions, so put them in
203
+ # double quotes to make sure that they get re-expanded; and
204
+ # * put everything else in single quotes, so that it's not re-expanded.
205
+
206
+ set -- \
207
+ "-Dorg.gradle.appname=$APP_BASE_NAME" \
208
+ -classpath "$CLASSPATH" \
209
+ org.gradle.wrapper.GradleWrapperMain \
210
+ "$@"
211
+
212
+ # Stop when "xargs" is not available.
213
+ if ! command -v xargs >/dev/null 2>&1
214
+ then
215
+ die "xargs is not available"
216
+ fi
217
+
218
+ # Use "xargs" to parse quoted args.
219
+ #
220
+ # With -n1 it outputs one arg per line, with the quotes and backslashes removed.
221
+ #
222
+ # In Bash we could simply go:
223
+ #
224
+ # readarray ARGS < <( xargs -n1 <<<"$var" ) &&
225
+ # set -- "${ARGS[@]}" "$@"
226
+ #
227
+ # but POSIX shell has neither arrays nor command substitution, so instead we
228
+ # post-process each arg (as a line of input to sed) to backslash-escape any
229
+ # character that might be a shell metacharacter, then use eval to reverse
230
+ # that process (while maintaining the separation between arguments), and wrap
231
+ # the whole thing up as a single "set" statement.
232
+ #
233
+ # This will of course break if any of these variables contains a newline or
234
+ # an unmatched quote.
235
+ #
236
+
237
+ eval "set -- $(
238
+ printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" |
239
+ xargs -n1 |
240
+ sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' |
241
+ tr '\n' ' '
242
+ )" '"$@"'
243
+
244
+ exec "$JAVACMD" "$@"
bindings/java/gradlew.bat ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @rem
2
+ @rem Copyright 2015 the original author or authors.
3
+ @rem
4
+ @rem Licensed under the Apache License, Version 2.0 (the "License");
5
+ @rem you may not use this file except in compliance with the License.
6
+ @rem You may obtain a copy of the License at
7
+ @rem
8
+ @rem https://www.apache.org/licenses/LICENSE-2.0
9
+ @rem
10
+ @rem Unless required by applicable law or agreed to in writing, software
11
+ @rem distributed under the License is distributed on an "AS IS" BASIS,
12
+ @rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ @rem See the License for the specific language governing permissions and
14
+ @rem limitations under the License.
15
+ @rem
16
+
17
+ @if "%DEBUG%"=="" @echo off
18
+ @rem ##########################################################################
19
+ @rem
20
+ @rem Gradle startup script for Windows
21
+ @rem
22
+ @rem ##########################################################################
23
+
24
+ @rem Set local scope for the variables with windows NT shell
25
+ if "%OS%"=="Windows_NT" setlocal
26
+
27
+ set DIRNAME=%~dp0
28
+ if "%DIRNAME%"=="" set DIRNAME=.
29
+ @rem This is normally unused
30
+ set APP_BASE_NAME=%~n0
31
+ set APP_HOME=%DIRNAME%
32
+
33
+ @rem Resolve any "." and ".." in APP_HOME to make it shorter.
34
+ for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi
35
+
36
+ @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
37
+ set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
38
+
39
+ @rem Find java.exe
40
+ if defined JAVA_HOME goto findJavaFromJavaHome
41
+
42
+ set JAVA_EXE=java.exe
43
+ %JAVA_EXE% -version >NUL 2>&1
44
+ if %ERRORLEVEL% equ 0 goto execute
45
+
46
+ echo.
47
+ echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
48
+ echo.
49
+ echo Please set the JAVA_HOME variable in your environment to match the
50
+ echo location of your Java installation.
51
+
52
+ goto fail
53
+
54
+ :findJavaFromJavaHome
55
+ set JAVA_HOME=%JAVA_HOME:"=%
56
+ set JAVA_EXE=%JAVA_HOME%/bin/java.exe
57
+
58
+ if exist "%JAVA_EXE%" goto execute
59
+
60
+ echo.
61
+ echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
62
+ echo.
63
+ echo Please set the JAVA_HOME variable in your environment to match the
64
+ echo location of your Java installation.
65
+
66
+ goto fail
67
+
68
+ :execute
69
+ @rem Setup the command line
70
+
71
+ set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
72
+
73
+
74
+ @rem Execute Gradle
75
+ "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
76
+
77
+ :end
78
+ @rem End local scope for the variables with windows NT shell
79
+ if %ERRORLEVEL% equ 0 goto mainEnd
80
+
81
+ :fail
82
+ rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
83
+ rem the _cmd.exe /c_ return code!
84
+ set EXIT_CODE=%ERRORLEVEL%
85
+ if %EXIT_CODE% equ 0 set EXIT_CODE=1
86
+ if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE%
87
+ exit /b %EXIT_CODE%
88
+
89
+ :mainEnd
90
+ if "%OS%"=="Windows_NT" endlocal
91
+
92
+ :omega
bindings/java/settings.gradle ADDED
@@ -0,0 +1 @@
 
 
1
+ rootProject.name = "whispercpp"
bindings/java/src/main/cpp/whisper_java.cpp ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #include <stdio.h>
2
+ #include "whisper_java.h"
3
+
4
+ struct whisper_full_params default_params;
5
+ struct whisper_context * whisper_ctx = nullptr;
6
+
7
+ struct void whisper_java_default_params(enum whisper_sampling_strategy strategy) {
8
+ default_params = whisper_full_default_params(strategy);
9
+
10
+ // struct whisper_java_params result = {};
11
+ // return result;
12
+ return;
13
+ }
14
+
15
+ void whisper_java_init_from_file(const char * path_model) {
16
+ whisper_ctx = whisper_init_from_file(path_model);
17
+ if (0 == default_params.n_threads) {
18
+ whisper_java_default_params(WHISPER_SAMPLING_GREEDY);
19
+ }
20
+ }
21
+
22
+ /** Delegates to whisper_full, but without having to pass `whisper_full_params` */
23
+ int whisper_java_full(
24
+ struct whisper_context * ctx,
25
+ // struct whisper_java_params params,
26
+ const float * samples,
27
+ int n_samples) {
28
+ return whisper_full(ctx, default_params, samples, n_samples);
29
+ }
30
+
31
+ void whisper_java_free() {
32
+ // free(default_params);
33
+ }
bindings/java/src/main/cpp/whisper_java.h ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #define WHISPER_BUILD
2
+ #include <whisper.h>
3
+
4
+ #ifdef __cplusplus
5
+ extern "C" {
6
+ #endif
7
+
8
+ struct whisper_java_params {
9
+ };
10
+
11
+ WHISPER_API void whisper_java_default_params(enum whisper_sampling_strategy strategy);
12
+
13
+ WHISPER_API void whisper_java_init_from_file(const char * path_model);
14
+
15
+ WHISPER_API int whisper_java_full(
16
+ struct whisper_context * ctx,
17
+ // struct whisper_java_params params,
18
+ const float * samples,
19
+ int n_samples);
20
+
21
+
22
+ #ifdef __cplusplus
23
+ }
24
+ #endif
bindings/java/src/main/java/io/github/ggerganov/whispercpp/WhisperContext.java ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package io.github.ggerganov.whispercpp;
2
+
3
+ import com.sun.jna.Structure;
4
+ import com.sun.jna.ptr.PointerByReference;
5
+ import io.github.ggerganov.whispercpp.ggml.GgmlType;
6
+ import io.github.ggerganov.whispercpp.WhisperModel;
7
+
8
+ import java.util.List;
9
+
10
+ public class WhisperContext extends Structure {
11
+ int t_load_us = 0;
12
+ int t_start_us = 0;
13
+
14
+ /** weight type (FP32 / FP16 / QX) */
15
+ GgmlType wtype = GgmlType.GGML_TYPE_F16;
16
+ /** intermediate type (FP32 or FP16) */
17
+ GgmlType itype = GgmlType.GGML_TYPE_F16;
18
+
19
+ // WhisperModel model;
20
+ public PointerByReference model;
21
+ // whisper_vocab vocab;
22
+ // whisper_state * state = nullptr;
23
+ public PointerByReference vocab;
24
+ public PointerByReference state;
25
+
26
+ /** populated by whisper_init_from_file() */
27
+ String path_model;
28
+
29
+ // public static class ByReference extends WhisperContext implements Structure.ByReference {
30
+ // }
31
+ //
32
+ // public static class ByValue extends WhisperContext implements Structure.ByValue {
33
+ // }
34
+ //
35
+ // @Override
36
+ // protected List<String> getFieldOrder() {
37
+ // return List.of("t_load_us", "t_start_us", "wtype", "itype", "model", "vocab", "state", "path_model");
38
+ // }
39
+ }
bindings/java/src/main/java/io/github/ggerganov/whispercpp/WhisperCpp.java ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package io.github.ggerganov.whispercpp;
2
+
3
+ import com.sun.jna.Pointer;
4
+ import io.github.ggerganov.whispercpp.params.WhisperJavaParams;
5
+ import io.github.ggerganov.whispercpp.params.WhisperSamplingStrategy;
6
+
7
+ import java.io.File;
8
+ import java.io.FileNotFoundException;
9
+ import java.io.IOException;
10
+
11
+ /**
12
+ * Before calling most methods, you must call `initContext(modelPath)` to initialise the `ctx` Pointer.
13
+ */
14
+ public class WhisperCpp implements AutoCloseable {
15
+ private WhisperCppJnaLibrary lib = WhisperCppJnaLibrary.instance;
16
+ private WhisperJavaJnaLibrary javaLib = WhisperJavaJnaLibrary.instance;
17
+ private Pointer ctx = null;
18
+
19
+ public File modelDir() {
20
+ String modelDirPath = System.getenv("XDG_CACHE_HOME");
21
+ if (modelDirPath == null) {
22
+ modelDirPath = System.getProperty("user.home") + "/.cache";
23
+ }
24
+
25
+ return new File(modelDirPath, "whisper");
26
+ }
27
+
28
+ /**
29
+ * @param modelPath - absolute path, or just the name (eg: "base", "base-en" or "base.en")
30
+ * @return a Pointer to the WhisperContext
31
+ */
32
+ void initContext(String modelPath) throws FileNotFoundException {
33
+ if (ctx != null) {
34
+ lib.whisper_free(ctx);
35
+ }
36
+
37
+ if (!modelPath.contains("/") && !modelPath.contains("\\")) {
38
+ if (!modelPath.endsWith(".bin")) {
39
+ modelPath = "ggml-" + modelPath.replace("-", ".") + ".bin";
40
+ }
41
+
42
+ modelPath = new File(modelDir(), modelPath).getAbsolutePath();
43
+ }
44
+
45
+ javaLib.whisper_java_init_from_file(modelPath);
46
+ ctx = lib.whisper_init_from_file(modelPath);
47
+
48
+ if (ctx == null) {
49
+ throw new FileNotFoundException(modelPath);
50
+ }
51
+ }
52
+
53
+ /**
54
+ * Initialises `whisper_full_params` internally in whisper_java.cpp so JNA doesn't have to map everything.
55
+ * `whisper_java_init_from_file()` calls `whisper_java_default_params(WHISPER_SAMPLING_GREEDY)` for convenience.
56
+ */
57
+ public void getDefaultJavaParams(WhisperSamplingStrategy strategy) {
58
+ javaLib.whisper_java_default_params(strategy.ordinal());
59
+ // return lib.whisper_full_default_params(strategy.value)
60
+ }
61
+
62
+ // whisper_full_default_params was too hard to integrate with, so for now we use javaLib.whisper_java_default_params
63
+ // fun getDefaultParams(strategy: WhisperSamplingStrategy): WhisperFullParams {
64
+ // return lib.whisper_full_default_params(strategy.value)
65
+ // }
66
+
67
+ @Override
68
+ public void close() {
69
+ freeContext();
70
+ System.out.println("Whisper closed");
71
+ }
72
+
73
+ private void freeContext() {
74
+ if (ctx != null) {
75
+ lib.whisper_free(ctx);
76
+ }
77
+ }
78
+
79
+ /**
80
+ * Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text.
81
+ * Not thread safe for same context
82
+ * Uses the specified decoding strategy to obtain the text.
83
+ */
84
+ public String fullTranscribe(/*WhisperJavaParams whisperParams,*/ float[] audioData) throws IOException {
85
+ if (ctx == null) {
86
+ throw new IllegalStateException("Model not initialised");
87
+ }
88
+
89
+ if (javaLib.whisper_java_full(ctx, /*whisperParams,*/ audioData, audioData.length) != 0) {
90
+ throw new IOException("Failed to process audio");
91
+ }
92
+
93
+ int nSegments = lib.whisper_full_n_segments(ctx);
94
+
95
+ StringBuilder str = new StringBuilder();
96
+
97
+ for (int i = 0; i < nSegments; i++) {
98
+ String text = lib.whisper_full_get_segment_text(ctx, i);
99
+ System.out.println("Segment:" + text);
100
+ str.append(text);
101
+ }
102
+
103
+ return str.toString().trim();
104
+ }
105
+
106
+ // public int getTextSegmentCount(Pointer ctx) {
107
+ // return lib.whisper_full_n_segments(ctx);
108
+ // }
109
+ // public String getTextSegment(Pointer ctx, int index) {
110
+ // return lib.whisper_full_get_segment_text(ctx, index);
111
+ // }
112
+
113
+ public String getSystemInfo() {
114
+ return lib.whisper_print_system_info();
115
+ }
116
+
117
+ public int benchMemcpy(int nthread) {
118
+ return lib.whisper_bench_memcpy(nthread);
119
+ }
120
+
121
+ public int benchGgmlMulMat(int nthread) {
122
+ return lib.whisper_bench_ggml_mul_mat(nthread);
123
+ }
124
+ }
bindings/java/src/main/java/io/github/ggerganov/whispercpp/WhisperCppJnaLibrary.java ADDED
@@ -0,0 +1,365 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package io.github.ggerganov.whispercpp;
2
+
3
+ import com.sun.jna.Library;
4
+ import com.sun.jna.Native;
5
+ import com.sun.jna.Pointer;
6
+ import io.github.ggerganov.whispercpp.model.WhisperModelLoader;
7
+ import io.github.ggerganov.whispercpp.model.WhisperTokenData;
8
+ import io.github.ggerganov.whispercpp.params.WhisperFullParams;
9
+
10
+ public interface WhisperCppJnaLibrary extends Library {
11
+ WhisperCppJnaLibrary instance = Native.load("whisper", WhisperCppJnaLibrary.class);
12
+
13
+ String whisper_print_system_info();
14
+
15
+ /**
16
+ * Allocate (almost) all memory needed for the model by loading from a file.
17
+ *
18
+ * @param path_model Path to the model file
19
+ * @return Whisper context on success, null on failure
20
+ */
21
+ Pointer whisper_init_from_file(String path_model);
22
+
23
+ /**
24
+ * Allocate (almost) all memory needed for the model by loading from a buffer.
25
+ *
26
+ * @param buffer Model buffer
27
+ * @param buffer_size Size of the model buffer
28
+ * @return Whisper context on success, null on failure
29
+ */
30
+ Pointer whisper_init_from_buffer(Pointer buffer, int buffer_size);
31
+
32
+ /**
33
+ * Allocate (almost) all memory needed for the model using a model loader.
34
+ *
35
+ * @param loader Model loader
36
+ * @return Whisper context on success, null on failure
37
+ */
38
+ Pointer whisper_init(WhisperModelLoader loader);
39
+
40
+ /**
41
+ * Allocate (almost) all memory needed for the model by loading from a file without allocating the state.
42
+ *
43
+ * @param path_model Path to the model file
44
+ * @return Whisper context on success, null on failure
45
+ */
46
+ Pointer whisper_init_from_file_no_state(String path_model);
47
+
48
+ /**
49
+ * Allocate (almost) all memory needed for the model by loading from a buffer without allocating the state.
50
+ *
51
+ * @param buffer Model buffer
52
+ * @param buffer_size Size of the model buffer
53
+ * @return Whisper context on success, null on failure
54
+ */
55
+ Pointer whisper_init_from_buffer_no_state(Pointer buffer, int buffer_size);
56
+
57
+ // Pointer whisper_init_from_buffer_no_state(Pointer buffer, long buffer_size);
58
+
59
+ /**
60
+ * Allocate (almost) all memory needed for the model using a model loader without allocating the state.
61
+ *
62
+ * @param loader Model loader
63
+ * @return Whisper context on success, null on failure
64
+ */
65
+ Pointer whisper_init_no_state(WhisperModelLoader loader);
66
+
67
+ /**
68
+ * Allocate memory for the Whisper state.
69
+ *
70
+ * @param ctx Whisper context
71
+ * @return Whisper state on success, null on failure
72
+ */
73
+ Pointer whisper_init_state(Pointer ctx);
74
+
75
+ /**
76
+ * Free all allocated memory associated with the Whisper context.
77
+ *
78
+ * @param ctx Whisper context
79
+ */
80
+ void whisper_free(Pointer ctx);
81
+
82
+ /**
83
+ * Free all allocated memory associated with the Whisper state.
84
+ *
85
+ * @param state Whisper state
86
+ */
87
+ void whisper_free_state(Pointer state);
88
+
89
+
90
+ /**
91
+ * Convert RAW PCM audio to log mel spectrogram.
92
+ * The resulting spectrogram is stored inside the default state of the provided whisper context.
93
+ *
94
+ * @param ctx - Pointer to a WhisperContext
95
+ * @return 0 on success
96
+ */
97
+ int whisper_pcm_to_mel(Pointer ctx, final float[] samples, int n_samples, int n_threads);
98
+
99
+ /**
100
+ * @param ctx Pointer to a WhisperContext
101
+ * @param state Pointer to WhisperState
102
+ * @param n_samples
103
+ * @param n_threads
104
+ * @return 0 on success
105
+ */
106
+ int whisper_pcm_to_mel_with_state(Pointer ctx, Pointer state, final float[] samples, int n_samples, int n_threads);
107
+
108
+ /**
109
+ * This can be used to set a custom log mel spectrogram inside the default state of the provided whisper context.
110
+ * Use this instead of whisper_pcm_to_mel() if you want to provide your own log mel spectrogram.
111
+ * n_mel must be 80
112
+ * @return 0 on success
113
+ */
114
+ int whisper_set_mel(Pointer ctx, final float[] data, int n_len, int n_mel);
115
+ int whisper_set_mel_with_state(Pointer ctx, Pointer state, final float[] data, int n_len, int n_mel);
116
+
117
+ /**
118
+ * Run the Whisper encoder on the log mel spectrogram stored inside the default state in the provided whisper context.
119
+ * Make sure to call whisper_pcm_to_mel() or whisper_set_mel() first.
120
+ * Offset can be used to specify the offset of the first frame in the spectrogram.
121
+ * @return 0 on success
122
+ */
123
+ int whisper_encode(Pointer ctx, int offset, int n_threads);
124
+
125
+ int whisper_encode_with_state(Pointer ctx, Pointer state, int offset, int n_threads);
126
+
127
+ /**
128
+ * Run the Whisper decoder to obtain the logits and probabilities for the next token.
129
+ * Make sure to call whisper_encode() first.
130
+ * tokens + n_tokens is the provided context for the decoder.
131
+ * n_past is the number of tokens to use from previous decoder calls.
132
+ * Returns 0 on success
133
+ * TODO: add support for multiple decoders
134
+ */
135
+ int whisper_decode(Pointer ctx, Pointer tokens, int n_tokens, int n_past, int n_threads);
136
+
137
+ /**
138
+ * @param ctx
139
+ * @param state
140
+ * @param tokens Pointer to int tokens
141
+ * @param n_tokens
142
+ * @param n_past
143
+ * @param n_threads
144
+ * @return
145
+ */
146
+ int whisper_decode_with_state(Pointer ctx, Pointer state, Pointer tokens, int n_tokens, int n_past, int n_threads);
147
+
148
+ /**
149
+ * Convert the provided text into tokens.
150
+ * The tokens pointer must be large enough to hold the resulting tokens.
151
+ * Returns the number of tokens on success, no more than n_max_tokens
152
+ * Returns -1 on failure
153
+ * TODO: not sure if correct
154
+ */
155
+ int whisper_tokenize(Pointer ctx, String text, Pointer tokens, int n_max_tokens);
156
+
157
+ /** Largest language id (i.e. number of available languages - 1) */
158
+ int whisper_lang_max_id();
159
+
160
+ /**
161
+ * @return the id of the specified language, returns -1 if not found.
162
+ * Examples:
163
+ * "de" -> 2
164
+ * "german" -> 2
165
+ */
166
+ int whisper_lang_id(String lang);
167
+
168
+ /** @return the short string of the specified language id (e.g. 2 -> "de"), returns nullptr if not found */
169
+ String whisper_lang_str(int id);
170
+
171
+ /**
172
+ * Use mel data at offset_ms to try and auto-detect the spoken language.
173
+ * Make sure to call whisper_pcm_to_mel() or whisper_set_mel() first
174
+ * Returns the top language id or negative on failure
175
+ * If not null, fills the lang_probs array with the probabilities of all languages
176
+ * The array must be whisper_lang_max_id() + 1 in size
177
+ *
178
+ * ref: https://github.com/openai/whisper/blob/main/whisper/decoding.py#L18-L69
179
+ */
180
+ int whisper_lang_auto_detect(Pointer ctx, int offset_ms, int n_threads, float[] lang_probs);
181
+
182
+ int whisper_lang_auto_detect_with_state(Pointer ctx, Pointer state, int offset_ms, int n_threads, float[] lang_probs);
183
+
184
+ int whisper_n_len (Pointer ctx); // mel length
185
+ int whisper_n_len_from_state(Pointer state); // mel length
186
+ int whisper_n_vocab (Pointer ctx);
187
+ int whisper_n_text_ctx (Pointer ctx);
188
+ int whisper_n_audio_ctx (Pointer ctx);
189
+ int whisper_is_multilingual (Pointer ctx);
190
+
191
+ int whisper_model_n_vocab (Pointer ctx);
192
+ int whisper_model_n_audio_ctx (Pointer ctx);
193
+ int whisper_model_n_audio_state(Pointer ctx);
194
+ int whisper_model_n_audio_head (Pointer ctx);
195
+ int whisper_model_n_audio_layer(Pointer ctx);
196
+ int whisper_model_n_text_ctx (Pointer ctx);
197
+ int whisper_model_n_text_state (Pointer ctx);
198
+ int whisper_model_n_text_head (Pointer ctx);
199
+ int whisper_model_n_text_layer (Pointer ctx);
200
+ int whisper_model_n_mels (Pointer ctx);
201
+ int whisper_model_ftype (Pointer ctx);
202
+ int whisper_model_type (Pointer ctx);
203
+
204
+ /**
205
+ * Token logits obtained from the last call to whisper_decode().
206
+ * The logits for the last token are stored in the last row
207
+ * Rows: n_tokens
208
+ * Cols: n_vocab
209
+ */
210
+ float[] whisper_get_logits (Pointer ctx);
211
+ float[] whisper_get_logits_from_state(Pointer state);
212
+
213
+ // Token Id -> String. Uses the vocabulary in the provided context
214
+ String whisper_token_to_str(Pointer ctx, int token);
215
+ String whisper_model_type_readable(Pointer ctx);
216
+
217
+ // Special tokens
218
+ int whisper_token_eot (Pointer ctx);
219
+ int whisper_token_sot (Pointer ctx);
220
+ int whisper_token_prev(Pointer ctx);
221
+ int whisper_token_solm(Pointer ctx);
222
+ int whisper_token_not (Pointer ctx);
223
+ int whisper_token_beg (Pointer ctx);
224
+ int whisper_token_lang(Pointer ctx, int lang_id);
225
+
226
+ // Task tokens
227
+ int whisper_token_translate();
228
+ int whisper_token_transcribe();
229
+
230
+ // Performance information from the default state.
231
+ void whisper_print_timings(Pointer ctx);
232
+ void whisper_reset_timings(Pointer ctx);
233
+
234
+ /**
235
+ * @param strategy - WhisperSamplingStrategy.value
236
+ */
237
+ WhisperFullParams whisper_full_default_params(int strategy);
238
+
239
+ /**
240
+ * Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text
241
+ * Not thread safe for same context
242
+ * Uses the specified decoding strategy to obtain the text.
243
+ */
244
+ int whisper_full(Pointer ctx, WhisperFullParams params, final float[] samples, int n_samples);
245
+
246
+ int whisper_full_with_state(Pointer ctx, Pointer state, WhisperFullParams params, final float[] samples, int n_samples);
247
+
248
+ // Split the input audio in chunks and process each chunk separately using whisper_full_with_state()
249
+ // Result is stored in the default state of the context
250
+ // Not thread safe if executed in parallel on the same context.
251
+ // It seems this approach can offer some speedup in some cases.
252
+ // However, the transcription accuracy can be worse at the beginning and end of each chunk.
253
+ int whisper_full_parallel(Pointer ctx, WhisperFullParams params, final float[] samples, int n_samples, int n_processors);
254
+
255
+ /**
256
+ * Number of generated text segments.
257
+ * A segment can be a few words, a sentence, or even a paragraph.
258
+ * @param ctx Pointer to WhisperContext
259
+ */
260
+ int whisper_full_n_segments (Pointer ctx);
261
+
262
+ /**
263
+ * @param state Pointer to WhisperState
264
+ */
265
+ int whisper_full_n_segments_from_state(Pointer state);
266
+
267
+ /**
268
+ * Language id associated with the context's default state.
269
+ * @param ctx Pointer to WhisperContext
270
+ */
271
+ int whisper_full_lang_id(Pointer ctx);
272
+
273
+ /** Language id associated with the provided state */
274
+ int whisper_full_lang_id_from_state(Pointer state);
275
+
276
+ /**
277
+ * Convert RAW PCM audio to log mel spectrogram but applies a Phase Vocoder to speed up the audio x2.
278
+ * The resulting spectrogram is stored inside the default state of the provided whisper context.
279
+ * @return 0 on success
280
+ */
281
+ int whisper_pcm_to_mel_phase_vocoder(Pointer ctx, final float[] samples, int n_samples, int n_threads);
282
+
283
+ int whisper_pcm_to_mel_phase_vocoder_with_state(Pointer ctx, Pointer state, final float[] samples, int n_samples, int n_threads);
284
+
285
+ /** Get the start time of the specified segment. */
286
+ long whisper_full_get_segment_t0(Pointer ctx, int i_segment);
287
+
288
+ /** Get the start time of the specified segment from the state. */
289
+ long whisper_full_get_segment_t0_from_state(Pointer state, int i_segment);
290
+
291
+ /** Get the end time of the specified segment. */
292
+ long whisper_full_get_segment_t1(Pointer ctx, int i_segment);
293
+
294
+ /** Get the end time of the specified segment from the state. */
295
+ long whisper_full_get_segment_t1_from_state(Pointer state, int i_segment);
296
+
297
+ /** Get the text of the specified segment. */
298
+ String whisper_full_get_segment_text(Pointer ctx, int i_segment);
299
+
300
+ /** Get the text of the specified segment from the state. */
301
+ String whisper_full_get_segment_text_from_state(Pointer state, int i_segment);
302
+
303
+ /** Get the number of tokens in the specified segment. */
304
+ int whisper_full_n_tokens(Pointer ctx, int i_segment);
305
+
306
+ /** Get the number of tokens in the specified segment from the state. */
307
+ int whisper_full_n_tokens_from_state(Pointer state, int i_segment);
308
+
309
+ /** Get the token text of the specified token in the specified segment. */
310
+ String whisper_full_get_token_text(Pointer ctx, int i_segment, int i_token);
311
+
312
+
313
+ /** Get the token text of the specified token in the specified segment from the state. */
314
+ String whisper_full_get_token_text_from_state(Pointer ctx, Pointer state, int i_segment, int i_token);
315
+
316
+ /** Get the token ID of the specified token in the specified segment. */
317
+ int whisper_full_get_token_id(Pointer ctx, int i_segment, int i_token);
318
+
319
+ /** Get the token ID of the specified token in the specified segment from the state. */
320
+ int whisper_full_get_token_id_from_state(Pointer state, int i_segment, int i_token);
321
+
322
+ /** Get token data for the specified token in the specified segment. */
323
+ WhisperTokenData whisper_full_get_token_data(Pointer ctx, int i_segment, int i_token);
324
+
325
+ /** Get token data for the specified token in the specified segment from the state. */
326
+ WhisperTokenData whisper_full_get_token_data_from_state(Pointer state, int i_segment, int i_token);
327
+
328
+ /** Get the probability of the specified token in the specified segment. */
329
+ float whisper_full_get_token_p(Pointer ctx, int i_segment, int i_token);
330
+
331
+ /** Get the probability of the specified token in the specified segment from the state. */
332
+ float whisper_full_get_token_p_from_state(Pointer state, int i_segment, int i_token);
333
+
334
+ /**
335
+ * Benchmark function for memcpy.
336
+ *
337
+ * @param nThreads Number of threads to use for the benchmark.
338
+ * @return The result of the benchmark.
339
+ */
340
+ int whisper_bench_memcpy(int nThreads);
341
+
342
+ /**
343
+ * Benchmark function for memcpy as a string.
344
+ *
345
+ * @param nThreads Number of threads to use for the benchmark.
346
+ * @return The result of the benchmark as a string.
347
+ */
348
+ String whisper_bench_memcpy_str(int nThreads);
349
+
350
+ /**
351
+ * Benchmark function for ggml_mul_mat.
352
+ *
353
+ * @param nThreads Number of threads to use for the benchmark.
354
+ * @return The result of the benchmark.
355
+ */
356
+ int whisper_bench_ggml_mul_mat(int nThreads);
357
+
358
+ /**
359
+ * Benchmark function for ggml_mul_mat as a string.
360
+ *
361
+ * @param nThreads Number of threads to use for the benchmark.
362
+ * @return The result of the benchmark as a string.
363
+ */
364
+ String whisper_bench_ggml_mul_mat_str(int nThreads);
365
+ }
bindings/java/src/main/java/io/github/ggerganov/whispercpp/WhisperJavaJnaLibrary.java ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package io.github.ggerganov.whispercpp;
2
+
3
+ import com.sun.jna.Library;
4
+ import com.sun.jna.Native;
5
+ import com.sun.jna.Pointer;
6
+ import io.github.ggerganov.whispercpp.params.WhisperJavaParams;
7
+
8
+ interface WhisperJavaJnaLibrary extends Library {
9
+ WhisperJavaJnaLibrary instance = Native.load("whisper_java", WhisperJavaJnaLibrary.class);
10
+
11
+ void whisper_java_default_params(int strategy);
12
+
13
+ void whisper_java_free();
14
+
15
+ void whisper_java_init_from_file(String modelPath);
16
+
17
+ /**
18
+ * Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text.
19
+ * Not thread safe for same context
20
+ * Uses the specified decoding strategy to obtain the text.
21
+ */
22
+ int whisper_java_full(Pointer ctx, /*WhisperJavaParams params, */float[] samples, int nSamples);
23
+ }
bindings/java/src/main/java/io/github/ggerganov/whispercpp/callbacks/WhisperEncoderBeginCallback.java ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package io.github.ggerganov.whispercpp.callbacks;
2
+
3
+ import com.sun.jna.Callback;
4
+ import com.sun.jna.Pointer;
5
+ import io.github.ggerganov.whispercpp.WhisperContext;
6
+ import io.github.ggerganov.whispercpp.model.WhisperState;
7
+
8
+ /**
9
+ * Callback before the encoder starts.
10
+ * If not null, called before the encoder starts.
11
+ * If it returns false, the computation is aborted.
12
+ */
13
+ public interface WhisperEncoderBeginCallback extends Callback {
14
+
15
+ /**
16
+ * Callback method before the encoder starts.
17
+ *
18
+ * @param ctx The whisper context.
19
+ * @param state The whisper state.
20
+ * @param user_data User data.
21
+ * @return True if the computation should proceed, false otherwise.
22
+ */
23
+ boolean callback(WhisperContext ctx, WhisperState state, Pointer user_data);
24
+ }
bindings/java/src/main/java/io/github/ggerganov/whispercpp/callbacks/WhisperLogitsFilterCallback.java ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package io.github.ggerganov.whispercpp.callbacks;
2
+
3
+ import com.sun.jna.Pointer;
4
+ import io.github.ggerganov.whispercpp.WhisperContext;
5
+ import io.github.ggerganov.whispercpp.model.WhisperState;
6
+ import io.github.ggerganov.whispercpp.model.WhisperTokenData;
7
+
8
+ import javax.security.auth.callback.Callback;
9
+
10
+ /**
11
+ * Callback to filter logits.
12
+ * Can be used to modify the logits before sampling.
13
+ * If not null, called after applying temperature to logits.
14
+ */
15
+ public interface WhisperLogitsFilterCallback extends Callback {
16
+
17
+ /**
18
+ * Callback method to filter logits.
19
+ *
20
+ * @param ctx The whisper context.
21
+ * @param state The whisper state.
22
+ * @param tokens The array of whisper_token_data.
23
+ * @param n_tokens The number of tokens.
24
+ * @param logits The array of logits.
25
+ * @param user_data User data.
26
+ */
27
+ void callback(WhisperContext ctx, WhisperState state, WhisperTokenData[] tokens, int n_tokens, float[] logits, Pointer user_data);
28
+ }
bindings/java/src/main/java/io/github/ggerganov/whispercpp/callbacks/WhisperNewSegmentCallback.java ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package io.github.ggerganov.whispercpp.callbacks;
2
+
3
+ import com.sun.jna.Callback;
4
+ import com.sun.jna.Pointer;
5
+ import io.github.ggerganov.whispercpp.WhisperContext;
6
+ import io.github.ggerganov.whispercpp.model.WhisperState;
7
+
8
+ /**
9
+ * Callback for the text segment.
10
+ * Called on every newly generated text segment.
11
+ * Use the whisper_full_...() functions to obtain the text segments.
12
+ */
13
+ public interface WhisperNewSegmentCallback extends Callback {
14
+
15
+ /**
16
+ * Callback method for the text segment.
17
+ *
18
+ * @param ctx The whisper context.
19
+ * @param state The whisper state.
20
+ * @param n_new The number of newly generated text segments.
21
+ * @param user_data User data.
22
+ */
23
+ void callback(WhisperContext ctx, WhisperState state, int n_new, Pointer user_data);
24
+ }
bindings/java/src/main/java/io/github/ggerganov/whispercpp/callbacks/WhisperProgressCallback.java ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package io.github.ggerganov.whispercpp.callbacks;
2
+
3
+ import com.sun.jna.Pointer;
4
+ import io.github.ggerganov.whispercpp.WhisperContext;
5
+ import io.github.ggerganov.whispercpp.model.WhisperState;
6
+
7
+ import javax.security.auth.callback.Callback;
8
+
9
+ /**
10
+ * Callback for progress updates.
11
+ */
12
+ public interface WhisperProgressCallback extends Callback {
13
+
14
+ /**
15
+ * Callback method for progress updates.
16
+ *
17
+ * @param ctx The whisper context.
18
+ * @param state The whisper state.
19
+ * @param progress The progress value.
20
+ * @param user_data User data.
21
+ */
22
+ void callback(WhisperContext ctx, WhisperState state, int progress, Pointer user_data);
23
+ }
bindings/java/src/main/java/io/github/ggerganov/whispercpp/ggml/GgmlTensor.java ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ package io.github.ggerganov.whispercpp.ggml;
2
+
3
+ public class GgmlTensor {
4
+ }
bindings/java/src/main/java/io/github/ggerganov/whispercpp/ggml/GgmlType.java ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package io.github.ggerganov.whispercpp.ggml;
2
+
3
+ public enum GgmlType {
4
+ GGML_TYPE_F32,
5
+ GGML_TYPE_F16,
6
+ GGML_TYPE_Q4_0,
7
+ GGML_TYPE_Q4_1,
8
+ REMOVED_GGML_TYPE_Q4_2, // support has been removed
9
+ REMOVED_GGML_TYPE_Q4_3, // support has been removed
10
+ GGML_TYPE_Q5_0,
11
+ GGML_TYPE_Q5_1,
12
+ GGML_TYPE_Q8_0,
13
+ GGML_TYPE_Q8_1,
14
+ GGML_TYPE_I8,
15
+ GGML_TYPE_I16,
16
+ GGML_TYPE_I32,
17
+ GGML_TYPE_COUNT,
18
+ }
bindings/java/src/main/java/io/github/ggerganov/whispercpp/model/EModel.java ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ package io.github.ggerganov.whispercpp.model;
2
+
3
+ public enum EModel {
4
+ MODEL_UNKNOWN,
5
+ MODEL_TINY,
6
+ MODEL_BASE,
7
+ MODEL_SMALL,
8
+ MODEL_MEDIUM,
9
+ MODEL_LARGE,
10
+ }
bindings/java/src/main/java/io/github/ggerganov/whispercpp/model/WhisperModel.java ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package io.github.ggerganov.whispercpp;
2
+
3
+ import io.github.ggerganov.whispercpp.ggml.GgmlTensor;
4
+ import io.github.ggerganov.whispercpp.model.EModel;
5
+
6
+ public class WhisperModel {
7
+ // EModel type = EModel.MODEL_UNKNOWN;
8
+ //
9
+ // WhisperHParams hparams;
10
+ // WhisperFilters filters;
11
+ //
12
+ // // encoder.positional_embedding
13
+ // GgmlTensor e_pe;
14
+ //
15
+ // // encoder.conv1
16
+ // GgmlTensor e_conv_1_w;
17
+ // GgmlTensor e_conv_1_b;
18
+ //
19
+ // // encoder.conv2
20
+ // GgmlTensor e_conv_2_w;
21
+ // GgmlTensor e_conv_2_b;
22
+ //
23
+ // // encoder.ln_post
24
+ // GgmlTensor e_ln_w;
25
+ // GgmlTensor e_ln_b;
26
+ //
27
+ // // decoder.positional_embedding
28
+ // GgmlTensor d_pe;
29
+ //
30
+ // // decoder.token_embedding
31
+ // GgmlTensor d_te;
32
+ //
33
+ // // decoder.ln
34
+ // GgmlTensor d_ln_w;
35
+ // GgmlTensor d_ln_b;
36
+ //
37
+ // std::vector<whisper_layer_encoder> layers_encoder;
38
+ // std::vector<whisper_layer_decoder> layers_decoder;
39
+ //
40
+ // // context
41
+ // struct ggml_context * ctx;
42
+ //
43
+ // // the model memory buffer is read-only and can be shared between processors
44
+ // std::vector<uint8_t> * buf;
45
+ //
46
+ // // tensors
47
+ // int n_loaded;
48
+ // Map<String, GgmlTensor> tensors;
49
+ }
bindings/java/src/main/java/io/github/ggerganov/whispercpp/model/WhisperModelLoader.java ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package io.github.ggerganov.whispercpp.model;
2
+
3
+ import com.sun.jna.Callback;
4
+ import com.sun.jna.Pointer;
5
+ import com.sun.jna.Structure;
6
+
7
+
8
+ public class WhisperModelLoader extends Structure {
9
+ public Pointer context;
10
+ public ReadFunction read;
11
+ public EOFFunction eof;
12
+ public CloseFunction close;
13
+
14
+ public static class ReadFunction implements Callback {
15
+ public Pointer invoke(Pointer ctx, Pointer output, int readSize) {
16
+ // TODO
17
+ return ctx;
18
+ }
19
+ }
20
+
21
+ public static class EOFFunction implements Callback {
22
+ public boolean invoke(Pointer ctx) {
23
+ // TODO
24
+ return false;
25
+ }
26
+ }
27
+
28
+ public static class CloseFunction implements Callback {
29
+ public void invoke(Pointer ctx) {
30
+ // TODO
31
+ }
32
+ }
33
+
34
+ // public WhisperModelLoader(Pointer p) {
35
+ // super(p);
36
+ // read = new ReadFunction();
37
+ // eof = new EOFFunction();
38
+ // close = new CloseFunction();
39
+ // read.setCallback(this);
40
+ // eof.setCallback(this);
41
+ // close.setCallback(this);
42
+ // read.write();
43
+ // eof.write();
44
+ // close.write();
45
+ // }
46
+
47
+ public WhisperModelLoader() {
48
+ super();
49
+ }
50
+
51
+ public interface ReadCallback extends Callback {
52
+ Pointer invoke(Pointer ctx, Pointer output, int readSize);
53
+ }
54
+
55
+ public interface EOFCallback extends Callback {
56
+ boolean invoke(Pointer ctx);
57
+ }
58
+
59
+ public interface CloseCallback extends Callback {
60
+ void invoke(Pointer ctx);
61
+ }
62
+ }
bindings/java/src/main/java/io/github/ggerganov/whispercpp/model/WhisperState.java ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ package io.github.ggerganov.whispercpp.model;
2
+
3
+ public class WhisperState {
4
+ }
bindings/java/src/main/java/io/github/ggerganov/whispercpp/model/WhisperTokenData.java ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package io.github.ggerganov.whispercpp.model;
2
+
3
+ import com.sun.jna.Structure;
4
+
5
+ import java.util.Arrays;
6
+ import java.util.List;
7
+
8
+ /**
9
+ * Structure representing token data.
10
+ */
11
+ public class WhisperTokenData extends Structure {
12
+
13
+ /** Token ID. */
14
+ public int id;
15
+
16
+ /** Forced timestamp token ID. */
17
+ public int tid;
18
+
19
+ /** Probability of the token. */
20
+ public float p;
21
+
22
+ /** Log probability of the token. */
23
+ public float plog;
24
+
25
+ /** Probability of the timestamp token. */
26
+ public float pt;
27
+
28
+ /** Sum of probabilities of all timestamp tokens. */
29
+ public float ptsum;
30
+
31
+ /**
32
+ * Start time of the token (token-level timestamp data).
33
+ * Do not use if you haven't computed token-level timestamps.
34
+ */
35
+ public long t0;
36
+
37
+ /**
38
+ * End time of the token (token-level timestamp data).
39
+ * Do not use if you haven't computed token-level timestamps.
40
+ */
41
+ public long t1;
42
+
43
+ /** Voice length of the token. */
44
+ public float vlen;
45
+
46
+ @Override
47
+ protected List<String> getFieldOrder() {
48
+ return Arrays.asList("id", "tid", "p", "plog", "pt", "ptsum", "t0", "t1", "vlen");
49
+ }
50
+ }
bindings/java/src/main/java/io/github/ggerganov/whispercpp/params/WhisperFilters.java ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ package io.github.ggerganov.whispercpp.params;
2
+
3
+ import java.util.List;
4
+
5
+ public class WhisperFilters {
6
+ int n_mel;
7
+ int n_fft;
8
+
9
+ List<Float> data;
10
+ }
bindings/java/src/main/java/io/github/ggerganov/whispercpp/params/WhisperFullParams.java ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package io.github.ggerganov.whispercpp.params;
2
+
3
+ import com.sun.jna.Callback;
4
+ import com.sun.jna.Pointer;
5
+ import com.sun.jna.Structure;
6
+ import io.github.ggerganov.whispercpp.callbacks.WhisperEncoderBeginCallback;
7
+ import io.github.ggerganov.whispercpp.callbacks.WhisperLogitsFilterCallback;
8
+ import io.github.ggerganov.whispercpp.callbacks.WhisperNewSegmentCallback;
9
+ import io.github.ggerganov.whispercpp.callbacks.WhisperProgressCallback;
10
+
11
+ /**
12
+ * Parameters for the whisper_full() function.
13
+ * If you change the order or add new parameters, make sure to update the default values in whisper.cpp:
14
+ * whisper_full_default_params()
15
+ */
16
+ public class WhisperFullParams extends Structure {
17
+
18
+ /** Sampling strategy for whisper_full() function. */
19
+ public int strategy;
20
+
21
+ /** Number of threads. */
22
+ public int n_threads;
23
+
24
+ /** Maximum tokens to use from past text as a prompt for the decoder. */
25
+ public int n_max_text_ctx;
26
+
27
+ /** Start offset in milliseconds. */
28
+ public int offset_ms;
29
+
30
+ /** Audio duration to process in milliseconds. */
31
+ public int duration_ms;
32
+
33
+ /** Translate flag. */
34
+ public boolean translate;
35
+
36
+ /** Flag to indicate whether to use past transcription (if any) as an initial prompt for the decoder. */
37
+ public boolean no_context;
38
+
39
+ /** Flag to force single segment output (useful for streaming). */
40
+ public boolean single_segment;
41
+
42
+ /** Flag to print special tokens (e.g., &lt;SOT>, &lt;EOT>, &lt;BEG>, etc.). */
43
+ public boolean print_special;
44
+
45
+ /** Flag to print progress information. */
46
+ public boolean print_progress;
47
+
48
+ /** Flag to print results from within whisper.cpp (avoid it, use callback instead). */
49
+ public boolean print_realtime;
50
+
51
+ /** Flag to print timestamps for each text segment when printing realtime. */
52
+ public boolean print_timestamps;
53
+
54
+ /** [EXPERIMENTAL] Flag to enable token-level timestamps. */
55
+ public boolean token_timestamps;
56
+
57
+ /** [EXPERIMENTAL] Timestamp token probability threshold (~0.01). */
58
+ public float thold_pt;
59
+
60
+ /** [EXPERIMENTAL] Timestamp token sum probability threshold (~0.01). */
61
+ public float thold_ptsum;
62
+
63
+ /** Maximum segment length in characters. */
64
+ public int max_len;
65
+
66
+ /** Flag to split on word rather than on token (when used with max_len). */
67
+ public boolean split_on_word;
68
+
69
+ /** Maximum tokens per segment (0 = no limit). */
70
+ public int max_tokens;
71
+
72
+ /** Flag to speed up the audio by 2x using Phase Vocoder. */
73
+ public boolean speed_up;
74
+
75
+ /** Overwrite the audio context size (0 = use default). */
76
+ public int audio_ctx;
77
+
78
+ /** Tokens to provide to the whisper decoder as an initial prompt.
79
+ * These are prepended to any existing text context from a previous call. */
80
+ public String initial_prompt;
81
+
82
+ /** Prompt tokens. */
83
+ public Pointer prompt_tokens;
84
+
85
+ /** Number of prompt tokens. */
86
+ public int prompt_n_tokens;
87
+
88
+ /** Language for auto-detection.
89
+ * For auto-detection, set to `null`, `""`, or "auto". */
90
+ public String language;
91
+
92
+ /** Flag to indicate whether to detect language automatically. */
93
+ public boolean detect_language;
94
+
95
+ /** Common decoding parameters. */
96
+
97
+ /** Flag to suppress blank tokens. */
98
+ public boolean suppress_blank;
99
+
100
+ /** Flag to suppress non-speech tokens. */
101
+ public boolean suppress_non_speech_tokens;
102
+
103
+ /** Initial decoding temperature. */
104
+ public float temperature;
105
+
106
+ /** Maximum initial timestamp. */
107
+ public float max_initial_ts;
108
+
109
+ /** Length penalty. */
110
+ public float length_penalty;
111
+
112
+ /** Fallback parameters. */
113
+
114
+ /** Temperature increment. */
115
+ public float temperature_inc;
116
+
117
+ /** Entropy threshold (similar to OpenAI's "compression_ratio_threshold"). */
118
+ public float entropy_thold;
119
+
120
+ /** Log probability threshold. */
121
+ public float logprob_thold;
122
+
123
+ /** No speech threshold. */
124
+ public float no_speech_thold;
125
+
126
+ class GreedyParams extends Structure {
127
+ /** https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/transcribe.py#L264 */
128
+ public int best_of;
129
+ }
130
+
131
+ /** Greedy decoding parameters. */
132
+ public GreedyParams greedy;
133
+
134
+ class BeamSearchParams extends Structure {
135
+ /** ref: https://github.com/openai/whisper/blob/f82bc59f5ea234d4b97fb2860842ed38519f7e65/whisper/transcribe.py#L265 */
136
+ int beam_size;
137
+
138
+ /** ref: https://arxiv.org/pdf/2204.05424.pdf */
139
+ float patience;
140
+ }
141
+
142
+ /**
143
+ * Beam search decoding parameters.
144
+ */
145
+ public BeamSearchParams beam_search;
146
+
147
+ /**
148
+ * Callback for every newly generated text segment.
149
+ */
150
+ public WhisperNewSegmentCallback new_segment_callback;
151
+
152
+ /**
153
+ * User data for the new_segment_callback.
154
+ */
155
+ public Pointer new_segment_callback_user_data;
156
+
157
+ /**
158
+ * Callback on each progress update.
159
+ */
160
+ public WhisperProgressCallback progress_callback;
161
+
162
+ /**
163
+ * User data for the progress_callback.
164
+ */
165
+ public Pointer progress_callback_user_data;
166
+
167
+ /**
168
+ * Callback each time before the encoder starts.
169
+ */
170
+ public WhisperEncoderBeginCallback encoder_begin_callback;
171
+
172
+ /**
173
+ * User data for the encoder_begin_callback.
174
+ */
175
+ public Pointer encoder_begin_callback_user_data;
176
+
177
+ /**
178
+ * Callback by each decoder to filter obtained logits.
179
+ */
180
+ public WhisperLogitsFilterCallback logits_filter_callback;
181
+
182
+ /**
183
+ * User data for the logits_filter_callback.
184
+ */
185
+ public Pointer logits_filter_callback_user_data;
186
+ }
187
+
bindings/java/src/main/java/io/github/ggerganov/whispercpp/params/WhisperHParams.java ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package io.github.ggerganov.whispercpp.params;
2
+
3
+ public class WhisperHParams {
4
+ int n_vocab = 51864;
5
+ int n_audio_ctx = 1500;
6
+ int n_audio_state = 384;
7
+ int n_audio_head = 6;
8
+ int n_audio_layer = 4;
9
+ int n_text_ctx = 448;
10
+ int n_text_state = 384;
11
+ int n_text_head = 6;
12
+ int n_text_layer = 4;
13
+ int n_mels = 80;
14
+ int ftype = 1;
15
+ }
bindings/java/src/main/java/io/github/ggerganov/whispercpp/params/WhisperJavaParams.java ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ package io.github.ggerganov.whispercpp.params;
2
+
3
+ import com.sun.jna.Structure;
4
+
5
+ public class WhisperJavaParams extends Structure {
6
+
7
+ }
bindings/java/src/main/java/io/github/ggerganov/whispercpp/params/WhisperSamplingStrategy.java ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ package io.github.ggerganov.whispercpp.params;
2
+
3
+ /** Available sampling strategies */
4
+ public enum WhisperSamplingStrategy {
5
+ /** similar to OpenAI's GreedyDecoder */
6
+ WHISPER_SAMPLING_GREEDY,
7
+
8
+ /** similar to OpenAI's BeamSearchDecoder */
9
+ WHISPER_SAMPLING_BEAM_SEARCH
10
+ }
bindings/java/src/test/java/io/github/ggerganov/whispercpp/WhisperCppTest.java ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package io.github.ggerganov.whispercpp;
2
+
3
+ import static org.junit.jupiter.api.Assertions.*;
4
+
5
+ import io.github.ggerganov.whispercpp.params.WhisperJavaParams;
6
+ import io.github.ggerganov.whispercpp.params.WhisperSamplingStrategy;
7
+ import org.junit.jupiter.api.BeforeAll;
8
+ import org.junit.jupiter.api.Test;
9
+ import javax.sound.sampled.AudioInputStream;
10
+ import javax.sound.sampled.AudioSystem;
11
+ import java.io.File;
12
+ import java.io.FileNotFoundException;
13
+
14
+ class WhisperCppTest {
15
+ private static WhisperCpp whisper = new WhisperCpp();
16
+ private static boolean modelInitialised = false;
17
+
18
+ @BeforeAll
19
+ static void init() throws FileNotFoundException {
20
+ // By default, models are loaded from ~/.cache/whisper/ and are usually named "ggml-${name}.bin"
21
+ // or you can provide the absolute path to the model file.
22
+ String modelName = "base.en";
23
+ try {
24
+ whisper.initContext(modelName);
25
+ whisper.getDefaultJavaParams(WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY);
26
+ // whisper.getDefaultJavaParams(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH);
27
+ modelInitialised = true;
28
+ } catch (FileNotFoundException ex) {
29
+ System.out.println("Model " + modelName + " not found");
30
+ }
31
+ }
32
+
33
+ @Test
34
+ void testGetDefaultJavaParams() {
35
+ // When
36
+ whisper.getDefaultJavaParams(WhisperSamplingStrategy.WHISPER_SAMPLING_BEAM_SEARCH);
37
+
38
+ // Then if it doesn't throw we've connected to whisper.cpp
39
+ }
40
+
41
+ @Test
42
+ void testFullTranscribe() throws Exception {
43
+ if (!modelInitialised) {
44
+ System.out.println("Model not initialised, skipping test");
45
+ return;
46
+ }
47
+
48
+ // Given
49
+ File file = new File(System.getProperty("user.dir"), "../../samples/jfk.wav");
50
+ AudioInputStream audioInputStream = AudioSystem.getAudioInputStream(file);
51
+
52
+ byte[] b = new byte[audioInputStream.available()];
53
+ float[] floats = new float[b.length / 2];
54
+
55
+ try {
56
+ audioInputStream.read(b);
57
+
58
+ for (int i = 0, j = 0; i < b.length; i += 2, j++) {
59
+ int intSample = (int) (b[i + 1]) << 8 | (int) (b[i]) & 0xFF;
60
+ floats[j] = intSample / 32767.0f;
61
+ }
62
+
63
+ // When
64
+ String result = whisper.fullTranscribe(/*params,*/ floats);
65
+
66
+ // Then
67
+ System.out.println(result);
68
+ assertEquals("And so my fellow Americans, ask not what your country can do for you, " +
69
+ "ask what you can do for your country.",
70
+ result);
71
+ } finally {
72
+ audioInputStream.close();
73
+ }
74
+ }
75
+ }
bindings/java/src/test/java/io/github/ggerganov/whispercpp/WhisperJnaLibraryTest.java ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package io.github.ggerganov.whispercpp;
2
+
3
+ import static org.junit.jupiter.api.Assertions.*;
4
+
5
+ import org.junit.jupiter.api.Test;
6
+
7
+ class WhisperJnaLibraryTest {
8
+
9
+ @Test
10
+ void testWhisperPrint_system_info() {
11
+ String systemInfo = WhisperCppJnaLibrary.instance.whisper_print_system_info();
12
+ // eg: "AVX = 1 | AVX2 = 1 | AVX512 = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0
13
+ // | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 | COREML = 0 | "
14
+ System.out.println("System info: " + systemInfo);
15
+ assertTrue(systemInfo.length() > 10);
16
+ }
17
+ }