diff --git a/.gitignore b/.gitignore index c1e584dba32..c302c02f083 100644 --- a/.gitignore +++ b/.gitignore @@ -52,6 +52,7 @@ models/*.mlpackage bindings/java/.gradle/ bindings/java/.idea/ .idea/ +models/ggml-base.en.bin benchmark_results.csv cmake-build-debug/ diff --git a/bindings/java/bin/main/io/github/ggerganov/whispercpp/WhisperContext.class b/bindings/java/bin/main/io/github/ggerganov/whispercpp/WhisperContext.class new file mode 100644 index 00000000000..e06b2d25997 Binary files /dev/null and b/bindings/java/bin/main/io/github/ggerganov/whispercpp/WhisperContext.class differ diff --git a/bindings/java/bin/main/io/github/ggerganov/whispercpp/WhisperCpp.class b/bindings/java/bin/main/io/github/ggerganov/whispercpp/WhisperCpp.class new file mode 100644 index 00000000000..eb4e27ad687 Binary files /dev/null and b/bindings/java/bin/main/io/github/ggerganov/whispercpp/WhisperCpp.class differ diff --git a/bindings/java/bin/main/io/github/ggerganov/whispercpp/WhisperCppJnaLibrary.class b/bindings/java/bin/main/io/github/ggerganov/whispercpp/WhisperCppJnaLibrary.class new file mode 100644 index 00000000000..036d81ff0cd Binary files /dev/null and b/bindings/java/bin/main/io/github/ggerganov/whispercpp/WhisperCppJnaLibrary.class differ diff --git a/bindings/java/bin/main/io/github/ggerganov/whispercpp/bean/WhisperSegment.class b/bindings/java/bin/main/io/github/ggerganov/whispercpp/bean/WhisperSegment.class new file mode 100644 index 00000000000..f9e68400837 Binary files /dev/null and b/bindings/java/bin/main/io/github/ggerganov/whispercpp/bean/WhisperSegment.class differ diff --git a/bindings/java/bin/main/io/github/ggerganov/whispercpp/callbacks/WhisperEncoderBeginCallback.class b/bindings/java/bin/main/io/github/ggerganov/whispercpp/callbacks/WhisperEncoderBeginCallback.class new file mode 100644 index 00000000000..903bfdb7b2e Binary files /dev/null and b/bindings/java/bin/main/io/github/ggerganov/whispercpp/callbacks/WhisperEncoderBeginCallback.class differ diff --git a/bindings/java/bin/main/io/github/ggerganov/whispercpp/callbacks/WhisperLogitsFilterCallback.class b/bindings/java/bin/main/io/github/ggerganov/whispercpp/callbacks/WhisperLogitsFilterCallback.class new file mode 100644 index 00000000000..5aed877ae50 Binary files /dev/null and b/bindings/java/bin/main/io/github/ggerganov/whispercpp/callbacks/WhisperLogitsFilterCallback.class differ diff --git a/bindings/java/bin/main/io/github/ggerganov/whispercpp/callbacks/WhisperNewSegmentCallback.class b/bindings/java/bin/main/io/github/ggerganov/whispercpp/callbacks/WhisperNewSegmentCallback.class new file mode 100644 index 00000000000..7d7281589a0 Binary files /dev/null and b/bindings/java/bin/main/io/github/ggerganov/whispercpp/callbacks/WhisperNewSegmentCallback.class differ diff --git a/bindings/java/bin/main/io/github/ggerganov/whispercpp/callbacks/WhisperProgressCallback.class b/bindings/java/bin/main/io/github/ggerganov/whispercpp/callbacks/WhisperProgressCallback.class new file mode 100644 index 00000000000..909c4a0af2d Binary files /dev/null and b/bindings/java/bin/main/io/github/ggerganov/whispercpp/callbacks/WhisperProgressCallback.class differ diff --git a/bindings/java/bin/main/io/github/ggerganov/whispercpp/ggml/GgmlTensor.class b/bindings/java/bin/main/io/github/ggerganov/whispercpp/ggml/GgmlTensor.class new file mode 100644 index 00000000000..49b6a882e90 Binary files /dev/null and b/bindings/java/bin/main/io/github/ggerganov/whispercpp/ggml/GgmlTensor.class differ diff --git a/bindings/java/bin/main/io/github/ggerganov/whispercpp/ggml/GgmlType.class b/bindings/java/bin/main/io/github/ggerganov/whispercpp/ggml/GgmlType.class new file mode 100644 index 00000000000..656bad490ef Binary files /dev/null and b/bindings/java/bin/main/io/github/ggerganov/whispercpp/ggml/GgmlType.class differ diff --git a/bindings/java/bin/main/io/github/ggerganov/whispercpp/model/EModel.class b/bindings/java/bin/main/io/github/ggerganov/whispercpp/model/EModel.class new file mode 100644 index 00000000000..26417156829 Binary files /dev/null and b/bindings/java/bin/main/io/github/ggerganov/whispercpp/model/EModel.class differ diff --git a/bindings/java/bin/main/io/github/ggerganov/whispercpp/model/WhisperModel.class b/bindings/java/bin/main/io/github/ggerganov/whispercpp/model/WhisperModel.class new file mode 100644 index 00000000000..531b86ab243 Binary files /dev/null and b/bindings/java/bin/main/io/github/ggerganov/whispercpp/model/WhisperModel.class differ diff --git a/bindings/java/bin/main/io/github/ggerganov/whispercpp/model/WhisperModelLoader$CloseCallback.class b/bindings/java/bin/main/io/github/ggerganov/whispercpp/model/WhisperModelLoader$CloseCallback.class new file mode 100644 index 00000000000..078c6c16bb2 Binary files /dev/null and b/bindings/java/bin/main/io/github/ggerganov/whispercpp/model/WhisperModelLoader$CloseCallback.class differ diff --git a/bindings/java/bin/main/io/github/ggerganov/whispercpp/model/WhisperModelLoader$CloseFunction.class b/bindings/java/bin/main/io/github/ggerganov/whispercpp/model/WhisperModelLoader$CloseFunction.class new file mode 100644 index 00000000000..2d06ecaa960 Binary files /dev/null and b/bindings/java/bin/main/io/github/ggerganov/whispercpp/model/WhisperModelLoader$CloseFunction.class differ diff --git a/bindings/java/bin/main/io/github/ggerganov/whispercpp/model/WhisperModelLoader$EOFCallback.class b/bindings/java/bin/main/io/github/ggerganov/whispercpp/model/WhisperModelLoader$EOFCallback.class new file mode 100644 index 00000000000..6a64fcb81cc Binary files /dev/null and b/bindings/java/bin/main/io/github/ggerganov/whispercpp/model/WhisperModelLoader$EOFCallback.class differ diff --git a/bindings/java/bin/main/io/github/ggerganov/whispercpp/model/WhisperModelLoader$EOFFunction.class b/bindings/java/bin/main/io/github/ggerganov/whispercpp/model/WhisperModelLoader$EOFFunction.class new file mode 100644 index 00000000000..6be4fdb254b Binary files /dev/null and b/bindings/java/bin/main/io/github/ggerganov/whispercpp/model/WhisperModelLoader$EOFFunction.class differ diff --git a/bindings/java/bin/main/io/github/ggerganov/whispercpp/model/WhisperModelLoader$ReadCallback.class b/bindings/java/bin/main/io/github/ggerganov/whispercpp/model/WhisperModelLoader$ReadCallback.class new file mode 100644 index 00000000000..3b52dbf84f1 Binary files /dev/null and b/bindings/java/bin/main/io/github/ggerganov/whispercpp/model/WhisperModelLoader$ReadCallback.class differ diff --git a/bindings/java/bin/main/io/github/ggerganov/whispercpp/model/WhisperModelLoader$ReadFunction.class b/bindings/java/bin/main/io/github/ggerganov/whispercpp/model/WhisperModelLoader$ReadFunction.class new file mode 100644 index 00000000000..d4eeba0d499 Binary files /dev/null and b/bindings/java/bin/main/io/github/ggerganov/whispercpp/model/WhisperModelLoader$ReadFunction.class differ diff --git a/bindings/java/bin/main/io/github/ggerganov/whispercpp/model/WhisperModelLoader.class b/bindings/java/bin/main/io/github/ggerganov/whispercpp/model/WhisperModelLoader.class new file mode 100644 index 00000000000..5dce2593873 Binary files /dev/null and b/bindings/java/bin/main/io/github/ggerganov/whispercpp/model/WhisperModelLoader.class differ diff --git a/bindings/java/bin/main/io/github/ggerganov/whispercpp/model/WhisperState.class b/bindings/java/bin/main/io/github/ggerganov/whispercpp/model/WhisperState.class new file mode 100644 index 00000000000..dc1bf8e436d Binary files /dev/null and b/bindings/java/bin/main/io/github/ggerganov/whispercpp/model/WhisperState.class differ diff --git a/bindings/java/bin/main/io/github/ggerganov/whispercpp/model/WhisperTokenData.class b/bindings/java/bin/main/io/github/ggerganov/whispercpp/model/WhisperTokenData.class new file mode 100644 index 00000000000..de3e2e180d3 Binary files /dev/null and b/bindings/java/bin/main/io/github/ggerganov/whispercpp/model/WhisperTokenData.class differ diff --git a/bindings/java/bin/main/io/github/ggerganov/whispercpp/params/BeamSearchParams.class b/bindings/java/bin/main/io/github/ggerganov/whispercpp/params/BeamSearchParams.class new file mode 100644 index 00000000000..7b107779818 Binary files /dev/null and b/bindings/java/bin/main/io/github/ggerganov/whispercpp/params/BeamSearchParams.class differ diff --git a/bindings/java/bin/main/io/github/ggerganov/whispercpp/params/CBool.class b/bindings/java/bin/main/io/github/ggerganov/whispercpp/params/CBool.class new file mode 100644 index 00000000000..373fd01fa66 Binary files /dev/null and b/bindings/java/bin/main/io/github/ggerganov/whispercpp/params/CBool.class differ diff --git a/bindings/java/bin/main/io/github/ggerganov/whispercpp/params/GreedyParams.class b/bindings/java/bin/main/io/github/ggerganov/whispercpp/params/GreedyParams.class new file mode 100644 index 00000000000..fba8fdebf25 Binary files /dev/null and b/bindings/java/bin/main/io/github/ggerganov/whispercpp/params/GreedyParams.class differ diff --git a/bindings/java/bin/main/io/github/ggerganov/whispercpp/params/WhisperContextParams.class b/bindings/java/bin/main/io/github/ggerganov/whispercpp/params/WhisperContextParams.class new file mode 100644 index 00000000000..f05d7c487d3 Binary files /dev/null and b/bindings/java/bin/main/io/github/ggerganov/whispercpp/params/WhisperContextParams.class differ diff --git a/bindings/java/bin/main/io/github/ggerganov/whispercpp/params/WhisperFilters.class b/bindings/java/bin/main/io/github/ggerganov/whispercpp/params/WhisperFilters.class new file mode 100644 index 00000000000..b3c7b41fcd9 Binary files /dev/null and b/bindings/java/bin/main/io/github/ggerganov/whispercpp/params/WhisperFilters.class differ diff --git a/bindings/java/bin/main/io/github/ggerganov/whispercpp/params/WhisperFullParams.class b/bindings/java/bin/main/io/github/ggerganov/whispercpp/params/WhisperFullParams.class new file mode 100644 index 00000000000..3e612b214f4 Binary files /dev/null and b/bindings/java/bin/main/io/github/ggerganov/whispercpp/params/WhisperFullParams.class differ diff --git a/bindings/java/bin/main/io/github/ggerganov/whispercpp/params/WhisperHParams.class b/bindings/java/bin/main/io/github/ggerganov/whispercpp/params/WhisperHParams.class new file mode 100644 index 00000000000..c3cedf046e2 Binary files /dev/null and b/bindings/java/bin/main/io/github/ggerganov/whispercpp/params/WhisperHParams.class differ diff --git a/bindings/java/bin/main/io/github/ggerganov/whispercpp/params/WhisperSamplingStrategy.class b/bindings/java/bin/main/io/github/ggerganov/whispercpp/params/WhisperSamplingStrategy.class new file mode 100644 index 00000000000..3bcb4b4d072 Binary files /dev/null and b/bindings/java/bin/main/io/github/ggerganov/whispercpp/params/WhisperSamplingStrategy.class differ diff --git a/bindings/java/bin/test/io/github/ggerganov/whispercpp/WhisperCppTest.class b/bindings/java/bin/test/io/github/ggerganov/whispercpp/WhisperCppTest.class new file mode 100644 index 00000000000..ac0d926639b Binary files /dev/null and b/bindings/java/bin/test/io/github/ggerganov/whispercpp/WhisperCppTest.class differ diff --git a/bindings/java/bin/test/io/github/ggerganov/whispercpp/WhisperJnaLibraryTest.class b/bindings/java/bin/test/io/github/ggerganov/whispercpp/WhisperJnaLibraryTest.class new file mode 100644 index 00000000000..6168e0521a9 Binary files /dev/null and b/bindings/java/bin/test/io/github/ggerganov/whispercpp/WhisperJnaLibraryTest.class differ diff --git a/wer_testing/6097_5_mins/audio/astralplane_03_leadbeater_0639.wav b/wer_testing/6097_5_mins/audio/astralplane_03_leadbeater_0639.wav new file mode 100644 index 00000000000..c69f8134cee Binary files /dev/null and b/wer_testing/6097_5_mins/audio/astralplane_03_leadbeater_0639.wav differ diff --git a/wer_testing/6097_5_mins/audio/astralplane_04_leadbeater_0040.wav b/wer_testing/6097_5_mins/audio/astralplane_04_leadbeater_0040.wav new file mode 100644 index 00000000000..2770dc595aa Binary files /dev/null and b/wer_testing/6097_5_mins/audio/astralplane_04_leadbeater_0040.wav differ diff --git a/wer_testing/6097_5_mins/audio/astralplane_06_leadbeater_0349.wav b/wer_testing/6097_5_mins/audio/astralplane_06_leadbeater_0349.wav new file mode 100644 index 00000000000..0d9ee680c90 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/astralplane_06_leadbeater_0349.wav differ diff --git a/wer_testing/6097_5_mins/audio/bambatse_02_haggard_0323.wav b/wer_testing/6097_5_mins/audio/bambatse_02_haggard_0323.wav new file mode 100644 index 00000000000..2a7146302f3 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/bambatse_02_haggard_0323.wav differ diff --git a/wer_testing/6097_5_mins/audio/bambatse_04_haggard_0273.wav b/wer_testing/6097_5_mins/audio/bambatse_04_haggard_0273.wav new file mode 100644 index 00000000000..b142cdd9f27 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/bambatse_04_haggard_0273.wav differ diff --git a/wer_testing/6097_5_mins/audio/bambatse_06_haggard_0090.wav b/wer_testing/6097_5_mins/audio/bambatse_06_haggard_0090.wav new file mode 100644 index 00000000000..3305d4a614a Binary files /dev/null and b/wer_testing/6097_5_mins/audio/bambatse_06_haggard_0090.wav differ diff --git a/wer_testing/6097_5_mins/audio/bambatse_06_haggard_0163.wav b/wer_testing/6097_5_mins/audio/bambatse_06_haggard_0163.wav new file mode 100644 index 00000000000..42618b27cd4 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/bambatse_06_haggard_0163.wav differ diff --git a/wer_testing/6097_5_mins/audio/bambatse_06_haggard_0280.wav b/wer_testing/6097_5_mins/audio/bambatse_06_haggard_0280.wav new file mode 100644 index 00000000000..7dd3f9a9704 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/bambatse_06_haggard_0280.wav differ diff --git a/wer_testing/6097_5_mins/audio/bambatse_07_haggard_0054.wav b/wer_testing/6097_5_mins/audio/bambatse_07_haggard_0054.wav new file mode 100644 index 00000000000..ef33c10fe66 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/bambatse_07_haggard_0054.wav differ diff --git a/wer_testing/6097_5_mins/audio/bambatse_07_haggard_0195.wav b/wer_testing/6097_5_mins/audio/bambatse_07_haggard_0195.wav new file mode 100644 index 00000000000..406b8412767 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/bambatse_07_haggard_0195.wav differ diff --git a/wer_testing/6097_5_mins/audio/bambatse_10_haggard_0284.wav b/wer_testing/6097_5_mins/audio/bambatse_10_haggard_0284.wav new file mode 100644 index 00000000000..f9dc07bbbef Binary files /dev/null and b/wer_testing/6097_5_mins/audio/bambatse_10_haggard_0284.wav differ diff --git a/wer_testing/6097_5_mins/audio/bambatse_13_haggard_0083.wav b/wer_testing/6097_5_mins/audio/bambatse_13_haggard_0083.wav new file mode 100644 index 00000000000..1c2cedcfcba Binary files /dev/null and b/wer_testing/6097_5_mins/audio/bambatse_13_haggard_0083.wav differ diff --git a/wer_testing/6097_5_mins/audio/bambatse_15_haggard_0013.wav b/wer_testing/6097_5_mins/audio/bambatse_15_haggard_0013.wav new file mode 100644 index 00000000000..ad4ec94feae Binary files /dev/null and b/wer_testing/6097_5_mins/audio/bambatse_15_haggard_0013.wav differ diff --git a/wer_testing/6097_5_mins/audio/bambatse_15_haggard_0247.wav b/wer_testing/6097_5_mins/audio/bambatse_15_haggard_0247.wav new file mode 100644 index 00000000000..fe819ea2bd8 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/bambatse_15_haggard_0247.wav differ diff --git a/wer_testing/6097_5_mins/audio/bambatse_19_haggard_0301.wav b/wer_testing/6097_5_mins/audio/bambatse_19_haggard_0301.wav new file mode 100644 index 00000000000..b9562774438 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/bambatse_19_haggard_0301.wav differ diff --git a/wer_testing/6097_5_mins/audio/bambatse_20_haggard_0146.wav b/wer_testing/6097_5_mins/audio/bambatse_20_haggard_0146.wav new file mode 100644 index 00000000000..d92bf95c5c7 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/bambatse_20_haggard_0146.wav differ diff --git a/wer_testing/6097_5_mins/audio/bambatse_20_haggard_0331.wav b/wer_testing/6097_5_mins/audio/bambatse_20_haggard_0331.wav new file mode 100644 index 00000000000..ade0846540f Binary files /dev/null and b/wer_testing/6097_5_mins/audio/bambatse_20_haggard_0331.wav differ diff --git a/wer_testing/6097_5_mins/audio/bambatse_22_haggard_0394.wav b/wer_testing/6097_5_mins/audio/bambatse_22_haggard_0394.wav new file mode 100644 index 00000000000..a617ae4d2df Binary files /dev/null and b/wer_testing/6097_5_mins/audio/bambatse_22_haggard_0394.wav differ diff --git a/wer_testing/6097_5_mins/audio/glitteringplain_07_morris_0054.wav b/wer_testing/6097_5_mins/audio/glitteringplain_07_morris_0054.wav new file mode 100644 index 00000000000..af3103144bb Binary files /dev/null and b/wer_testing/6097_5_mins/audio/glitteringplain_07_morris_0054.wav differ diff --git a/wer_testing/6097_5_mins/audio/glitteringplain_09_morris_0119.wav b/wer_testing/6097_5_mins/audio/glitteringplain_09_morris_0119.wav new file mode 100644 index 00000000000..1827fcab7de Binary files /dev/null and b/wer_testing/6097_5_mins/audio/glitteringplain_09_morris_0119.wav differ diff --git a/wer_testing/6097_5_mins/audio/glitteringplain_11_morris_0037.wav b/wer_testing/6097_5_mins/audio/glitteringplain_11_morris_0037.wav new file mode 100644 index 00000000000..ec6ad92745a Binary files /dev/null and b/wer_testing/6097_5_mins/audio/glitteringplain_11_morris_0037.wav differ diff --git a/wer_testing/6097_5_mins/audio/glitteringplain_14_morris_0155.wav b/wer_testing/6097_5_mins/audio/glitteringplain_14_morris_0155.wav new file mode 100644 index 00000000000..3272d5d888c Binary files /dev/null and b/wer_testing/6097_5_mins/audio/glitteringplain_14_morris_0155.wav differ diff --git a/wer_testing/6097_5_mins/audio/glitteringplain_15_morris_0108.wav b/wer_testing/6097_5_mins/audio/glitteringplain_15_morris_0108.wav new file mode 100644 index 00000000000..de114991058 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/glitteringplain_15_morris_0108.wav differ diff --git a/wer_testing/6097_5_mins/audio/glitteringplain_20_morris_0336.wav b/wer_testing/6097_5_mins/audio/glitteringplain_20_morris_0336.wav new file mode 100644 index 00000000000..785ab810fa6 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/glitteringplain_20_morris_0336.wav differ diff --git a/wer_testing/6097_5_mins/audio/hartmann_01_fawcett_0317.wav b/wer_testing/6097_5_mins/audio/hartmann_01_fawcett_0317.wav new file mode 100644 index 00000000000..d6d65e069d4 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/hartmann_01_fawcett_0317.wav differ diff --git a/wer_testing/6097_5_mins/audio/hartmann_04_fawcett_0191.wav b/wer_testing/6097_5_mins/audio/hartmann_04_fawcett_0191.wav new file mode 100644 index 00000000000..4b6c04b6a26 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/hartmann_04_fawcett_0191.wav differ diff --git a/wer_testing/6097_5_mins/audio/hartmann_05_fawcett_0100.wav b/wer_testing/6097_5_mins/audio/hartmann_05_fawcett_0100.wav new file mode 100644 index 00000000000..5181a990470 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/hartmann_05_fawcett_0100.wav differ diff --git a/wer_testing/6097_5_mins/audio/hartmann_08_fawcett_0171.wav b/wer_testing/6097_5_mins/audio/hartmann_08_fawcett_0171.wav new file mode 100644 index 00000000000..9e1565ef8c3 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/hartmann_08_fawcett_0171.wav differ diff --git a/wer_testing/6097_5_mins/audio/hartmann_11_fawcett_0056.wav b/wer_testing/6097_5_mins/audio/hartmann_11_fawcett_0056.wav new file mode 100644 index 00000000000..aca994acb4a Binary files /dev/null and b/wer_testing/6097_5_mins/audio/hartmann_11_fawcett_0056.wav differ diff --git a/wer_testing/6097_5_mins/audio/hartmann_11_fawcett_0337.wav b/wer_testing/6097_5_mins/audio/hartmann_11_fawcett_0337.wav new file mode 100644 index 00000000000..583dbfd1142 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/hartmann_11_fawcett_0337.wav differ diff --git a/wer_testing/6097_5_mins/audio/hartmann_12_fawcett_0088.wav b/wer_testing/6097_5_mins/audio/hartmann_12_fawcett_0088.wav new file mode 100644 index 00000000000..d3a5b631c05 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/hartmann_12_fawcett_0088.wav differ diff --git a/wer_testing/6097_5_mins/audio/hartmann_12_fawcett_0110.wav b/wer_testing/6097_5_mins/audio/hartmann_12_fawcett_0110.wav new file mode 100644 index 00000000000..aeeb41651ba Binary files /dev/null and b/wer_testing/6097_5_mins/audio/hartmann_12_fawcett_0110.wav differ diff --git a/wer_testing/6097_5_mins/audio/hartmann_12_fawcett_0193.wav b/wer_testing/6097_5_mins/audio/hartmann_12_fawcett_0193.wav new file mode 100644 index 00000000000..fc5a77286b0 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/hartmann_12_fawcett_0193.wav differ diff --git a/wer_testing/6097_5_mins/audio/hartmann_15_fawcett_0148.wav b/wer_testing/6097_5_mins/audio/hartmann_15_fawcett_0148.wav new file mode 100644 index 00000000000..05271751539 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/hartmann_15_fawcett_0148.wav differ diff --git a/wer_testing/6097_5_mins/audio/jaufry_00_lafon_0062.wav b/wer_testing/6097_5_mins/audio/jaufry_00_lafon_0062.wav new file mode 100644 index 00000000000..ced8275eca8 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/jaufry_00_lafon_0062.wav differ diff --git a/wer_testing/6097_5_mins/audio/jaufry_01_lafon_0175.wav b/wer_testing/6097_5_mins/audio/jaufry_01_lafon_0175.wav new file mode 100644 index 00000000000..afbf9ad5db6 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/jaufry_01_lafon_0175.wav differ diff --git a/wer_testing/6097_5_mins/audio/jaufry_02_lafon_0029.wav b/wer_testing/6097_5_mins/audio/jaufry_02_lafon_0029.wav new file mode 100644 index 00000000000..17098652ace Binary files /dev/null and b/wer_testing/6097_5_mins/audio/jaufry_02_lafon_0029.wav differ diff --git a/wer_testing/6097_5_mins/audio/jaufry_03_lafon_0298.wav b/wer_testing/6097_5_mins/audio/jaufry_03_lafon_0298.wav new file mode 100644 index 00000000000..511d89d9afc Binary files /dev/null and b/wer_testing/6097_5_mins/audio/jaufry_03_lafon_0298.wav differ diff --git a/wer_testing/6097_5_mins/audio/jaufry_05_lafon_0217.wav b/wer_testing/6097_5_mins/audio/jaufry_05_lafon_0217.wav new file mode 100644 index 00000000000..7a0cec6447f Binary files /dev/null and b/wer_testing/6097_5_mins/audio/jaufry_05_lafon_0217.wav differ diff --git a/wer_testing/6097_5_mins/audio/jaufry_06_lafon_0466.wav b/wer_testing/6097_5_mins/audio/jaufry_06_lafon_0466.wav new file mode 100644 index 00000000000..ab0c297a2f3 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/jaufry_06_lafon_0466.wav differ diff --git a/wer_testing/6097_5_mins/audio/jaufry_07_lafon_0073.wav b/wer_testing/6097_5_mins/audio/jaufry_07_lafon_0073.wav new file mode 100644 index 00000000000..5592f1c4b20 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/jaufry_07_lafon_0073.wav differ diff --git a/wer_testing/6097_5_mins/audio/jaufry_08_lafon_0098.wav b/wer_testing/6097_5_mins/audio/jaufry_08_lafon_0098.wav new file mode 100644 index 00000000000..958711ffd3d Binary files /dev/null and b/wer_testing/6097_5_mins/audio/jaufry_08_lafon_0098.wav differ diff --git a/wer_testing/6097_5_mins/audio/jaufry_09_lafon_0365.wav b/wer_testing/6097_5_mins/audio/jaufry_09_lafon_0365.wav new file mode 100644 index 00000000000..530b63a46c2 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/jaufry_09_lafon_0365.wav differ diff --git a/wer_testing/6097_5_mins/audio/nada_lily_00_haggard_0218.wav b/wer_testing/6097_5_mins/audio/nada_lily_00_haggard_0218.wav new file mode 100644 index 00000000000..ffc60a4bba2 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/nada_lily_00_haggard_0218.wav differ diff --git a/wer_testing/6097_5_mins/audio/nada_lily_02_haggard_0216.wav b/wer_testing/6097_5_mins/audio/nada_lily_02_haggard_0216.wav new file mode 100644 index 00000000000..8a7e93e07d2 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/nada_lily_02_haggard_0216.wav differ diff --git a/wer_testing/6097_5_mins/audio/nada_lily_04_haggard_0027.wav b/wer_testing/6097_5_mins/audio/nada_lily_04_haggard_0027.wav new file mode 100644 index 00000000000..d15e18fe5fe Binary files /dev/null and b/wer_testing/6097_5_mins/audio/nada_lily_04_haggard_0027.wav differ diff --git a/wer_testing/6097_5_mins/audio/nada_lily_04_haggard_0190.wav b/wer_testing/6097_5_mins/audio/nada_lily_04_haggard_0190.wav new file mode 100644 index 00000000000..caf794aaf17 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/nada_lily_04_haggard_0190.wav differ diff --git a/wer_testing/6097_5_mins/audio/nada_lily_12_haggard_0130.wav b/wer_testing/6097_5_mins/audio/nada_lily_12_haggard_0130.wav new file mode 100644 index 00000000000..539b227720f Binary files /dev/null and b/wer_testing/6097_5_mins/audio/nada_lily_12_haggard_0130.wav differ diff --git a/wer_testing/6097_5_mins/audio/nada_lily_12_haggard_0523.wav b/wer_testing/6097_5_mins/audio/nada_lily_12_haggard_0523.wav new file mode 100644 index 00000000000..bdbf653ad5e Binary files /dev/null and b/wer_testing/6097_5_mins/audio/nada_lily_12_haggard_0523.wav differ diff --git a/wer_testing/6097_5_mins/audio/nada_lily_13_haggard_0035.wav b/wer_testing/6097_5_mins/audio/nada_lily_13_haggard_0035.wav new file mode 100644 index 00000000000..9123b10b779 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/nada_lily_13_haggard_0035.wav differ diff --git a/wer_testing/6097_5_mins/audio/nada_lily_15_haggard_0182.wav b/wer_testing/6097_5_mins/audio/nada_lily_15_haggard_0182.wav new file mode 100644 index 00000000000..e886515e5a4 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/nada_lily_15_haggard_0182.wav differ diff --git a/wer_testing/6097_5_mins/audio/nada_lily_15_haggard_0288.wav b/wer_testing/6097_5_mins/audio/nada_lily_15_haggard_0288.wav new file mode 100644 index 00000000000..89099543140 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/nada_lily_15_haggard_0288.wav differ diff --git a/wer_testing/6097_5_mins/audio/nada_lily_17_haggard_0378.wav b/wer_testing/6097_5_mins/audio/nada_lily_17_haggard_0378.wav new file mode 100644 index 00000000000..72b4748dd09 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/nada_lily_17_haggard_0378.wav differ diff --git a/wer_testing/6097_5_mins/audio/nada_lily_18_haggard_0386.wav b/wer_testing/6097_5_mins/audio/nada_lily_18_haggard_0386.wav new file mode 100644 index 00000000000..d1477ac18d2 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/nada_lily_18_haggard_0386.wav differ diff --git a/wer_testing/6097_5_mins/audio/nada_lily_20_haggard_0145.wav b/wer_testing/6097_5_mins/audio/nada_lily_20_haggard_0145.wav new file mode 100644 index 00000000000..d9658784521 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/nada_lily_20_haggard_0145.wav differ diff --git a/wer_testing/6097_5_mins/audio/nada_lily_21_haggard_0169.wav b/wer_testing/6097_5_mins/audio/nada_lily_21_haggard_0169.wav new file mode 100644 index 00000000000..2296b6dbdbd Binary files /dev/null and b/wer_testing/6097_5_mins/audio/nada_lily_21_haggard_0169.wav differ diff --git a/wer_testing/6097_5_mins/audio/nada_lily_21_haggard_0316.wav b/wer_testing/6097_5_mins/audio/nada_lily_21_haggard_0316.wav new file mode 100644 index 00000000000..609018fb22c Binary files /dev/null and b/wer_testing/6097_5_mins/audio/nada_lily_21_haggard_0316.wav differ diff --git a/wer_testing/6097_5_mins/audio/nada_lily_25_haggard_0351.wav b/wer_testing/6097_5_mins/audio/nada_lily_25_haggard_0351.wav new file mode 100644 index 00000000000..8c7c978a233 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/nada_lily_25_haggard_0351.wav differ diff --git a/wer_testing/6097_5_mins/audio/nada_lily_26_haggard_0157.wav b/wer_testing/6097_5_mins/audio/nada_lily_26_haggard_0157.wav new file mode 100644 index 00000000000..6a4ac2d0817 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/nada_lily_26_haggard_0157.wav differ diff --git a/wer_testing/6097_5_mins/audio/nada_lily_27_haggard_0177.wav b/wer_testing/6097_5_mins/audio/nada_lily_27_haggard_0177.wav new file mode 100644 index 00000000000..f1ff8e68a6b Binary files /dev/null and b/wer_testing/6097_5_mins/audio/nada_lily_27_haggard_0177.wav differ diff --git a/wer_testing/6097_5_mins/audio/nada_lily_30_haggard_0086.wav b/wer_testing/6097_5_mins/audio/nada_lily_30_haggard_0086.wav new file mode 100644 index 00000000000..d57d9bc65af Binary files /dev/null and b/wer_testing/6097_5_mins/audio/nada_lily_30_haggard_0086.wav differ diff --git a/wer_testing/6097_5_mins/audio/nada_lily_30_haggard_0368.wav b/wer_testing/6097_5_mins/audio/nada_lily_30_haggard_0368.wav new file mode 100644 index 00000000000..dcd9c54df72 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/nada_lily_30_haggard_0368.wav differ diff --git a/wer_testing/6097_5_mins/audio/nada_lily_32_haggard_0034.wav b/wer_testing/6097_5_mins/audio/nada_lily_32_haggard_0034.wav new file mode 100644 index 00000000000..04a4fee7a1b Binary files /dev/null and b/wer_testing/6097_5_mins/audio/nada_lily_32_haggard_0034.wav differ diff --git a/wer_testing/6097_5_mins/audio/presentpictureofnsw_01_mann_0076.wav b/wer_testing/6097_5_mins/audio/presentpictureofnsw_01_mann_0076.wav new file mode 100644 index 00000000000..5e81de9dc5c Binary files /dev/null and b/wer_testing/6097_5_mins/audio/presentpictureofnsw_01_mann_0076.wav differ diff --git a/wer_testing/6097_5_mins/audio/presentpictureofnsw_01_mann_0160.wav b/wer_testing/6097_5_mins/audio/presentpictureofnsw_01_mann_0160.wav new file mode 100644 index 00000000000..64a5c197074 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/presentpictureofnsw_01_mann_0160.wav differ diff --git a/wer_testing/6097_5_mins/audio/presentpictureofnsw_02_mann_0083.wav b/wer_testing/6097_5_mins/audio/presentpictureofnsw_02_mann_0083.wav new file mode 100644 index 00000000000..bded543cef2 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/presentpictureofnsw_02_mann_0083.wav differ diff --git a/wer_testing/6097_5_mins/audio/presentpictureofnsw_02_mann_0532.wav b/wer_testing/6097_5_mins/audio/presentpictureofnsw_02_mann_0532.wav new file mode 100644 index 00000000000..b2f7a0f0f83 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/presentpictureofnsw_02_mann_0532.wav differ diff --git a/wer_testing/6097_5_mins/audio/presentpictureofnsw_02_mann_0962.wav b/wer_testing/6097_5_mins/audio/presentpictureofnsw_02_mann_0962.wav new file mode 100644 index 00000000000..4c9198ef4c9 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/presentpictureofnsw_02_mann_0962.wav differ diff --git a/wer_testing/6097_5_mins/audio/presentpictureofnsw_02_mann_1095.wav b/wer_testing/6097_5_mins/audio/presentpictureofnsw_02_mann_1095.wav new file mode 100644 index 00000000000..2158dbf9b11 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/presentpictureofnsw_02_mann_1095.wav differ diff --git a/wer_testing/6097_5_mins/audio/presentpictureofnsw_04_mann_0197.wav b/wer_testing/6097_5_mins/audio/presentpictureofnsw_04_mann_0197.wav new file mode 100644 index 00000000000..d9bc3f12870 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/presentpictureofnsw_04_mann_0197.wav differ diff --git a/wer_testing/6097_5_mins/audio/presentpictureofnsw_04_mann_0473.wav b/wer_testing/6097_5_mins/audio/presentpictureofnsw_04_mann_0473.wav new file mode 100644 index 00000000000..1b26640890d Binary files /dev/null and b/wer_testing/6097_5_mins/audio/presentpictureofnsw_04_mann_0473.wav differ diff --git a/wer_testing/6097_5_mins/audio/presentpictureofnsw_05_mann_0592.wav b/wer_testing/6097_5_mins/audio/presentpictureofnsw_05_mann_0592.wav new file mode 100644 index 00000000000..85b747a9015 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/presentpictureofnsw_05_mann_0592.wav differ diff --git a/wer_testing/6097_5_mins/audio/roots_02_morris_0219.wav b/wer_testing/6097_5_mins/audio/roots_02_morris_0219.wav new file mode 100644 index 00000000000..8a0f92ee5f3 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/roots_02_morris_0219.wav differ diff --git a/wer_testing/6097_5_mins/audio/roots_04_morris_0057.wav b/wer_testing/6097_5_mins/audio/roots_04_morris_0057.wav new file mode 100644 index 00000000000..387b713e650 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/roots_04_morris_0057.wav differ diff --git a/wer_testing/6097_5_mins/audio/roots_04_morris_0400.wav b/wer_testing/6097_5_mins/audio/roots_04_morris_0400.wav new file mode 100644 index 00000000000..266f9b9eb7a Binary files /dev/null and b/wer_testing/6097_5_mins/audio/roots_04_morris_0400.wav differ diff --git a/wer_testing/6097_5_mins/audio/roots_06_morris_0244.wav b/wer_testing/6097_5_mins/audio/roots_06_morris_0244.wav new file mode 100644 index 00000000000..9431b8109e2 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/roots_06_morris_0244.wav differ diff --git a/wer_testing/6097_5_mins/audio/roots_09_morris_0034.wav b/wer_testing/6097_5_mins/audio/roots_09_morris_0034.wav new file mode 100644 index 00000000000..61cdef232a6 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/roots_09_morris_0034.wav differ diff --git a/wer_testing/6097_5_mins/audio/roots_09_morris_0085.wav b/wer_testing/6097_5_mins/audio/roots_09_morris_0085.wav new file mode 100644 index 00000000000..5144471f6b8 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/roots_09_morris_0085.wav differ diff --git a/wer_testing/6097_5_mins/audio/roots_17_morris_0032.wav b/wer_testing/6097_5_mins/audio/roots_17_morris_0032.wav new file mode 100644 index 00000000000..f306dcede6f Binary files /dev/null and b/wer_testing/6097_5_mins/audio/roots_17_morris_0032.wav differ diff --git a/wer_testing/6097_5_mins/audio/roots_19_morris_0120.wav b/wer_testing/6097_5_mins/audio/roots_19_morris_0120.wav new file mode 100644 index 00000000000..a61b607a064 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/roots_19_morris_0120.wav differ diff --git a/wer_testing/6097_5_mins/audio/roots_19_morris_0269.wav b/wer_testing/6097_5_mins/audio/roots_19_morris_0269.wav new file mode 100644 index 00000000000..cc582e14c52 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/roots_19_morris_0269.wav differ diff --git a/wer_testing/6097_5_mins/audio/roots_19_morris_0392.wav b/wer_testing/6097_5_mins/audio/roots_19_morris_0392.wav new file mode 100644 index 00000000000..da514501688 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/roots_19_morris_0392.wav differ diff --git a/wer_testing/6097_5_mins/audio/roots_27_morris_0352.wav b/wer_testing/6097_5_mins/audio/roots_27_morris_0352.wav new file mode 100644 index 00000000000..fb8ff73e222 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/roots_27_morris_0352.wav differ diff --git a/wer_testing/6097_5_mins/audio/roots_28_morris_0362.wav b/wer_testing/6097_5_mins/audio/roots_28_morris_0362.wav new file mode 100644 index 00000000000..3ffe3f20e16 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/roots_28_morris_0362.wav differ diff --git a/wer_testing/6097_5_mins/audio/roots_32_morris_0506.wav b/wer_testing/6097_5_mins/audio/roots_32_morris_0506.wav new file mode 100644 index 00000000000..e45e627a386 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/roots_32_morris_0506.wav differ diff --git a/wer_testing/6097_5_mins/audio/roots_36_morris_0138.wav b/wer_testing/6097_5_mins/audio/roots_36_morris_0138.wav new file mode 100644 index 00000000000..afc7dc5db50 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/roots_36_morris_0138.wav differ diff --git a/wer_testing/6097_5_mins/audio/roots_36_morris_0278.wav b/wer_testing/6097_5_mins/audio/roots_36_morris_0278.wav new file mode 100644 index 00000000000..f3f7543b780 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/roots_36_morris_0278.wav differ diff --git a/wer_testing/6097_5_mins/audio/roots_38_morris_0130.wav b/wer_testing/6097_5_mins/audio/roots_38_morris_0130.wav new file mode 100644 index 00000000000..142ff02036a Binary files /dev/null and b/wer_testing/6097_5_mins/audio/roots_38_morris_0130.wav differ diff --git a/wer_testing/6097_5_mins/audio/roots_39_morris_0340.wav b/wer_testing/6097_5_mins/audio/roots_39_morris_0340.wav new file mode 100644 index 00000000000..dd5dc0d71aa Binary files /dev/null and b/wer_testing/6097_5_mins/audio/roots_39_morris_0340.wav differ diff --git a/wer_testing/6097_5_mins/audio/roots_40_morris_0140.wav b/wer_testing/6097_5_mins/audio/roots_40_morris_0140.wav new file mode 100644 index 00000000000..3842c2df8f9 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/roots_40_morris_0140.wav differ diff --git a/wer_testing/6097_5_mins/audio/roots_40_morris_0157.wav b/wer_testing/6097_5_mins/audio/roots_40_morris_0157.wav new file mode 100644 index 00000000000..7bcb613e305 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/roots_40_morris_0157.wav differ diff --git a/wer_testing/6097_5_mins/audio/roots_40_morris_0200.wav b/wer_testing/6097_5_mins/audio/roots_40_morris_0200.wav new file mode 100644 index 00000000000..94c6026d621 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/roots_40_morris_0200.wav differ diff --git a/wer_testing/6097_5_mins/audio/roots_44_morris_0112.wav b/wer_testing/6097_5_mins/audio/roots_44_morris_0112.wav new file mode 100644 index 00000000000..de1cf34d994 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/roots_44_morris_0112.wav differ diff --git a/wer_testing/6097_5_mins/audio/roots_44_morris_0140.wav b/wer_testing/6097_5_mins/audio/roots_44_morris_0140.wav new file mode 100644 index 00000000000..46795e30a5f Binary files /dev/null and b/wer_testing/6097_5_mins/audio/roots_44_morris_0140.wav differ diff --git a/wer_testing/6097_5_mins/audio/roots_44_morris_0181.wav b/wer_testing/6097_5_mins/audio/roots_44_morris_0181.wav new file mode 100644 index 00000000000..49fc437554c Binary files /dev/null and b/wer_testing/6097_5_mins/audio/roots_44_morris_0181.wav differ diff --git a/wer_testing/6097_5_mins/audio/roots_50_morris_0055.wav b/wer_testing/6097_5_mins/audio/roots_50_morris_0055.wav new file mode 100644 index 00000000000..0f6563a3e4f Binary files /dev/null and b/wer_testing/6097_5_mins/audio/roots_50_morris_0055.wav differ diff --git a/wer_testing/6097_5_mins/audio/roots_50_morris_0154.wav b/wer_testing/6097_5_mins/audio/roots_50_morris_0154.wav new file mode 100644 index 00000000000..b288d229304 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/roots_50_morris_0154.wav differ diff --git a/wer_testing/6097_5_mins/audio/shortstories_01_gaskell_0158.wav b/wer_testing/6097_5_mins/audio/shortstories_01_gaskell_0158.wav new file mode 100644 index 00000000000..e16e79c4cc4 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/shortstories_01_gaskell_0158.wav differ diff --git a/wer_testing/6097_5_mins/audio/shortstories_05_gaskell_0338.wav b/wer_testing/6097_5_mins/audio/shortstories_05_gaskell_0338.wav new file mode 100644 index 00000000000..4d28d7aea8a Binary files /dev/null and b/wer_testing/6097_5_mins/audio/shortstories_05_gaskell_0338.wav differ diff --git a/wer_testing/6097_5_mins/audio/shortstories_05_gaskell_0539.wav b/wer_testing/6097_5_mins/audio/shortstories_05_gaskell_0539.wav new file mode 100644 index 00000000000..52b3bdf7852 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/shortstories_05_gaskell_0539.wav differ diff --git a/wer_testing/6097_5_mins/audio/shortstories_06_gaskell_0340.wav b/wer_testing/6097_5_mins/audio/shortstories_06_gaskell_0340.wav new file mode 100644 index 00000000000..3fb77f5213d Binary files /dev/null and b/wer_testing/6097_5_mins/audio/shortstories_06_gaskell_0340.wav differ diff --git a/wer_testing/6097_5_mins/audio/shortstories_06_gaskell_0370.wav b/wer_testing/6097_5_mins/audio/shortstories_06_gaskell_0370.wav new file mode 100644 index 00000000000..4fb8c689523 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/shortstories_06_gaskell_0370.wav differ diff --git a/wer_testing/6097_5_mins/audio/shortstories_06_gaskell_0676.wav b/wer_testing/6097_5_mins/audio/shortstories_06_gaskell_0676.wav new file mode 100644 index 00000000000..db876bd1f94 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/shortstories_06_gaskell_0676.wav differ diff --git a/wer_testing/6097_5_mins/audio/shortstories_06_gaskell_0776.wav b/wer_testing/6097_5_mins/audio/shortstories_06_gaskell_0776.wav new file mode 100644 index 00000000000..477ebd9149c Binary files /dev/null and b/wer_testing/6097_5_mins/audio/shortstories_06_gaskell_0776.wav differ diff --git a/wer_testing/6097_5_mins/audio/shortstories_06_gaskell_0784.wav b/wer_testing/6097_5_mins/audio/shortstories_06_gaskell_0784.wav new file mode 100644 index 00000000000..474429d4207 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/shortstories_06_gaskell_0784.wav differ diff --git a/wer_testing/6097_5_mins/audio/shortstories_06_gaskell_0920.wav b/wer_testing/6097_5_mins/audio/shortstories_06_gaskell_0920.wav new file mode 100644 index 00000000000..871c4982c15 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/shortstories_06_gaskell_0920.wav differ diff --git a/wer_testing/6097_5_mins/audio/shortstories_10_gaskell_0142.wav b/wer_testing/6097_5_mins/audio/shortstories_10_gaskell_0142.wav new file mode 100644 index 00000000000..1faa6181a16 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/shortstories_10_gaskell_0142.wav differ diff --git a/wer_testing/6097_5_mins/audio/shortstories_10_gaskell_0630.wav b/wer_testing/6097_5_mins/audio/shortstories_10_gaskell_0630.wav new file mode 100644 index 00000000000..d13d3b9ac55 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/shortstories_10_gaskell_0630.wav differ diff --git a/wer_testing/6097_5_mins/audio/swag_03_tompkins_0380.wav b/wer_testing/6097_5_mins/audio/swag_03_tompkins_0380.wav new file mode 100644 index 00000000000..2171bf33863 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/swag_03_tompkins_0380.wav differ diff --git a/wer_testing/6097_5_mins/audio/swag_04_tompkins_0094.wav b/wer_testing/6097_5_mins/audio/swag_04_tompkins_0094.wav new file mode 100644 index 00000000000..fba01ab3674 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/swag_04_tompkins_0094.wav differ diff --git a/wer_testing/6097_5_mins/audio/swag_04_tompkins_0193.wav b/wer_testing/6097_5_mins/audio/swag_04_tompkins_0193.wav new file mode 100644 index 00000000000..be8ab0b4cd7 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/swag_04_tompkins_0193.wav differ diff --git a/wer_testing/6097_5_mins/audio/swag_04_tompkins_0828.wav b/wer_testing/6097_5_mins/audio/swag_04_tompkins_0828.wav new file mode 100644 index 00000000000..cbf19459489 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/swag_04_tompkins_0828.wav differ diff --git a/wer_testing/6097_5_mins/audio/swag_05_tompkins_0039.wav b/wer_testing/6097_5_mins/audio/swag_05_tompkins_0039.wav new file mode 100644 index 00000000000..3c537dd3bbc Binary files /dev/null and b/wer_testing/6097_5_mins/audio/swag_05_tompkins_0039.wav differ diff --git a/wer_testing/6097_5_mins/audio/swag_05_tompkins_0390.wav b/wer_testing/6097_5_mins/audio/swag_05_tompkins_0390.wav new file mode 100644 index 00000000000..61fc5f479e9 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/swag_05_tompkins_0390.wav differ diff --git a/wer_testing/6097_5_mins/audio/swag_06_tompkins_0082.wav b/wer_testing/6097_5_mins/audio/swag_06_tompkins_0082.wav new file mode 100644 index 00000000000..a6d65e00b98 Binary files /dev/null and b/wer_testing/6097_5_mins/audio/swag_06_tompkins_0082.wav differ diff --git a/wer_testing/6097_5_mins/audio/swag_06_tompkins_0883.wav b/wer_testing/6097_5_mins/audio/swag_06_tompkins_0883.wav new file mode 100644 index 00000000000..73a43139a6e Binary files /dev/null and b/wer_testing/6097_5_mins/audio/swag_06_tompkins_0883.wav differ diff --git a/wer_testing/6097_5_mins/manifest.json b/wer_testing/6097_5_mins/manifest.json new file mode 100644 index 00000000000..37cfc9c4a71 --- /dev/null +++ b/wer_testing/6097_5_mins/manifest.json @@ -0,0 +1,116 @@ +{"audio_filepath": "audio/presentpictureofnsw_02_mann_0532.wav", "text": "not to stop more than ten minutes by the way", "duration": 2.6, "text_no_preprocessing": "not to stop more than ten minutes by the way,", "text_normalized": "not to stop more than ten minutes by the way,"} +{"audio_filepath": "audio/roots_19_morris_0269.wav", "text": "they were men having no country to go back to", "duration": 2.68, "text_no_preprocessing": "they were men having no country to go back to,", "text_normalized": "they were men having no country to go back to,"} +{"audio_filepath": "audio/swag_06_tompkins_0883.wav", "text": "no mistake can well be made", "duration": 1.88, "text_no_preprocessing": "no mistake can well be made.", "text_normalized": "no mistake can well be made."} +{"audio_filepath": "audio/glitteringplain_15_morris_0108.wav", "text": "if thou needs must depart", "duration": 1.88, "text_no_preprocessing": "if thou needs must depart.", "text_normalized": "if thou needs must depart."} +{"audio_filepath": "audio/roots_19_morris_0392.wav", "text": "and there i grew up when i was little", "duration": 2.13, "text_no_preprocessing": "and there I grew up when I was little,", "text_normalized": "and there I grew up when I was little,"} +{"audio_filepath": "audio/roots_44_morris_0181.wav", "text": "these made no tarrying on that space of the dead", "duration": 2.88, "text_no_preprocessing": "These made no tarrying on that space of the dead,", "text_normalized": "These made no tarrying on that space of the dead,"} +{"audio_filepath": "audio/shortstories_06_gaskell_0370.wav", "text": "and see how many wild flowers you can bring me home", "duration": 2.92, "text_no_preprocessing": "and see how many wild flowers you can bring me home,", "text_normalized": "and see how many wild flowers you can bring me home,"} +{"audio_filepath": "audio/nada_lily_21_haggard_0316.wav", "text": "awake ye kings", "duration": 1.3, "text_no_preprocessing": "\u201cAwake, ye kings,\u201d", "text_normalized": "\"Awake, ye kings,\""} +{"audio_filepath": "audio/presentpictureofnsw_02_mann_0083.wav", "text": "and all boats to be moored within the hospital wharf and hulk", "duration": 3.46, "text_no_preprocessing": "and all boats to be moored within the Hospital wharf, and hulk.", "text_normalized": "and all boats to be moored within the Hospital wharf, and hulk."} +{"audio_filepath": "audio/jaufry_03_lafon_0298.wav", "text": "said the king", "duration": 0.94, "text_no_preprocessing": "said the king,", "text_normalized": "said the king,"} +{"audio_filepath": "audio/roots_40_morris_0200.wav", "text": "and good it is that friends should talk together on the eve of battle", "duration": 3.47, "text_no_preprocessing": "and good it is that friends should talk together on the eve of battle,", "text_normalized": "and good it is that friends should talk together on the eve of battle,"} +{"audio_filepath": "audio/swag_04_tompkins_0828.wav", "text": "chief justice", "duration": 1.05, "text_no_preprocessing": "Chief Justice,", "text_normalized": "Chief Justice,"} +{"audio_filepath": "audio/shortstories_06_gaskell_0676.wav", "text": "and cover himself as well as he could with his clothes", "duration": 2.94, "text_no_preprocessing": "and cover himself as well as he could with his clothes.", "text_normalized": "and cover himself as well as he could with his clothes."} +{"audio_filepath": "audio/nada_lily_20_haggard_0145.wav", "text": "that there before it stood chaka", "duration": 2.44, "text_no_preprocessing": "that there before it stood Chaka,", "text_normalized": "that there before it stood Chaka,"} +{"audio_filepath": "audio/roots_40_morris_0140.wav", "text": "as folk who were grown exceeding dear to them", "duration": 2.86, "text_no_preprocessing": "as folk who were grown exceeding dear to them;", "text_normalized": "as folk who were grown exceeding dear to them;"} +{"audio_filepath": "audio/presentpictureofnsw_01_mann_0160.wav", "text": "governor phillip sailed to england on the eleventh of december", "duration": 3.2, "text_no_preprocessing": "Governor Phillip sailed to England on the 11th of December,", "text_normalized": "Governor Phillip sailed to England on the eleventh of December,"} +{"audio_filepath": "audio/bambatse_13_haggard_0083.wav", "text": "i take no credit for reading that upon your lips", "duration": 2.9, "text_no_preprocessing": "I take no credit for reading that upon your lips;", "text_normalized": "I take no credit for reading that upon your lips;"} +{"audio_filepath": "audio/nada_lily_00_haggard_0218.wav", "text": "formerly treasurer to the government of natal", "duration": 2.51, "text_no_preprocessing": "formerly treasurer to the Government of Natal,", "text_normalized": "formerly treasurer to the Government of Natal,"} +{"audio_filepath": "audio/bambatse_15_haggard_0247.wav", "text": "well", "duration": 0.51, "text_no_preprocessing": "Well,", "text_normalized": "Well,"} +{"audio_filepath": "audio/roots_17_morris_0032.wav", "text": "he went up to it and handled it", "duration": 1.76, "text_no_preprocessing": "He went up to it and handled it,", "text_normalized": "He went up to it and handled it,"} +{"audio_filepath": "audio/roots_38_morris_0130.wav", "text": "and would not forbear", "duration": 1.2, "text_no_preprocessing": "and would not forbear,", "text_normalized": "and would not forbear,"} +{"audio_filepath": "audio/jaufry_00_lafon_0062.wav", "text": "blossomed in full freshness like a rose in spring", "duration": 3.08, "text_no_preprocessing": "blossomed in full freshness like a rose in spring.", "text_normalized": "blossomed in full freshness like a rose in spring."} +{"audio_filepath": "audio/roots_09_morris_0034.wav", "text": "and i shall be a young man yet when it comes", "duration": 2.58, "text_no_preprocessing": "and I shall be a young man yet when it comes.", "text_normalized": "and I shall be a young man yet when it comes."} +{"audio_filepath": "audio/nada_lily_32_haggard_0034.wav", "text": "does the lily then live as the soldier thought", "duration": 3.4, "text_no_preprocessing": "\u201cdoes the Lily, then, live as the soldier thought?\u201d", "text_normalized": "\"does the Lily, then, live as the soldier thought?\""} +{"audio_filepath": "audio/nada_lily_27_haggard_0177.wav", "text": "answered the prayer doctor", "duration": 1.76, "text_no_preprocessing": "answered the prayer-doctor,", "text_normalized": "answered the prayer-doctor,"} +{"audio_filepath": "audio/nada_lily_13_haggard_0035.wav", "text": "where the bones of the boy should be", "duration": 2.12, "text_no_preprocessing": "where the bones of the boy should be.", "text_normalized": "where the bones of the boy should be."} +{"audio_filepath": "audio/presentpictureofnsw_02_mann_1095.wav", "text": "in a great measure", "duration": 1.02, "text_no_preprocessing": "in a great measure,", "text_normalized": "in a great measure,"} +{"audio_filepath": "audio/bambatse_20_haggard_0331.wav", "text": "then", "duration": 0.44, "text_no_preprocessing": "then,", "text_normalized": "then,"} +{"audio_filepath": "audio/jaufry_02_lafon_0029.wav", "text": "putting his horse into an ambling pace", "duration": 2.54, "text_no_preprocessing": "Putting his horse into an ambling pace,", "text_normalized": "Putting his horse into an ambling pace,"} +{"audio_filepath": "audio/hartmann_15_fawcett_0148.wav", "text": "to devise some new and less costly mode of attack", "duration": 3.3, "text_no_preprocessing": "to devise some new and less costly mode of attack.", "text_normalized": "to devise some new and less costly mode of attack."} +{"audio_filepath": "audio/roots_04_morris_0057.wav", "text": "and shining in the sun as brightly as the alderman's cunningest work of golden wire", "duration": 5.66, "text_no_preprocessing": "and shining in the sun as brightly as the Alderman's cunningest work of golden wire.", "text_normalized": "and shining in the sun as brightly as the Alderman's cunningest work of golden wire."} +{"audio_filepath": "audio/swag_04_tompkins_0094.wav", "text": "for the sake of the view", "duration": 1.39, "text_no_preprocessing": "for the sake of the view,", "text_normalized": "for the sake of the view,"} +{"audio_filepath": "audio/nada_lily_12_haggard_0130.wav", "text": "and the end of it was that he killed the chief of the tribe and took his place", "duration": 3.92, "text_no_preprocessing": "and the end of it was that he killed the chief of the tribe and took his place.", "text_normalized": "and the end of it was that he killed the chief of the tribe and took his place."} +{"audio_filepath": "audio/shortstories_01_gaskell_0158.wav", "text": "her little sisters knelt with her in prayer", "duration": 2.6, "text_no_preprocessing": "Her little sisters knelt with her in prayer,", "text_normalized": "Her little sisters knelt with her in prayer,"} +{"audio_filepath": "audio/hartmann_01_fawcett_0317.wav", "text": "failing further news", "duration": 1.3, "text_no_preprocessing": "failing further news,", "text_normalized": "failing further news,"} +{"audio_filepath": "audio/glitteringplain_09_morris_0119.wav", "text": "and cried out in his shrill voice", "duration": 2.32, "text_no_preprocessing": "and cried out in his shrill voice:", "text_normalized": "and cried out in his shrill voice:"} +{"audio_filepath": "audio/bambatse_02_haggard_0323.wav", "text": "knowing that there was time to spare", "duration": 2.06, "text_no_preprocessing": "knowing that there was time to spare.", "text_normalized": "knowing that there was time to spare."} +{"audio_filepath": "audio/shortstories_10_gaskell_0630.wav", "text": "and being blamed by the dear teacher", "duration": 2.36, "text_no_preprocessing": "and being blamed by the dear teacher.", "text_normalized": "and being blamed by the dear teacher."} +{"audio_filepath": "audio/swag_05_tompkins_0039.wav", "text": "and is reached towards evening", "duration": 1.92, "text_no_preprocessing": "and is reached towards evening.", "text_normalized": "and is reached towards evening."} +{"audio_filepath": "audio/bambatse_10_haggard_0284.wav", "text": "so in my dreams have i seen and heard her also", "duration": 3.68, "text_no_preprocessing": "So in my dreams have I seen and heard her also,", "text_normalized": "So in my dreams have I seen and heard her also,"} +{"audio_filepath": "audio/nada_lily_21_haggard_0169.wav", "text": "foam flew from his lips", "duration": 1.92, "text_no_preprocessing": "foam flew from his lips,", "text_normalized": "foam flew from his lips,"} +{"audio_filepath": "audio/shortstories_06_gaskell_0340.wav", "text": "and to see so many people", "duration": 1.76, "text_no_preprocessing": "and to see so many people;", "text_normalized": "and to see so many people;"} +{"audio_filepath": "audio/roots_02_morris_0219.wav", "text": "and could shoot well in the bow", "duration": 1.82, "text_no_preprocessing": "and could shoot well in the bow,", "text_normalized": "and could shoot well in the bow,"} +{"audio_filepath": "audio/roots_50_morris_0055.wav", "text": "and i know by myself", "duration": 1.74, "text_no_preprocessing": "and I know by myself,", "text_normalized": "and I know by myself,"} +{"audio_filepath": "audio/roots_36_morris_0278.wav", "text": "and stood up facing him", "duration": 1.5, "text_no_preprocessing": "and stood up facing him,", "text_normalized": "and stood up facing him,"} +{"audio_filepath": "audio/bambatse_07_haggard_0195.wav", "text": "we offer this gift", "duration": 1.39, "text_no_preprocessing": "\u201cWe offer this gift,\u201d", "text_normalized": "\"We offer this gift,\""} +{"audio_filepath": "audio/presentpictureofnsw_04_mann_0197.wav", "text": "were greatly retarded", "duration": 1.52, "text_no_preprocessing": "were greatly retarded.", "text_normalized": "were greatly retarded."} +{"audio_filepath": "audio/nada_lily_30_haggard_0368.wav", "text": "if you love me and will wed me it is enough", "duration": 3.72, "text_no_preprocessing": "\u201cIf you love me and will wed me, it is enough.", "text_normalized": "If you love me and will wed me, it is enough."} +{"audio_filepath": "audio/presentpictureofnsw_05_mann_0592.wav", "text": "it is not possible for humanity to look upon the probable consequences", "duration": 4.21, "text_no_preprocessing": "it is not possible for humanity to look upon the probable consequences,", "text_normalized": "it is not possible for humanity to look upon the probable consequences,"} +{"audio_filepath": "audio/nada_lily_02_haggard_0216.wav", "text": "they had broken down the ferns with their feet and trampled the grass", "duration": 4.2, "text_no_preprocessing": "they had broken down the ferns with their feet and trampled the grass.", "text_normalized": "they had broken down the ferns with their feet and trampled the grass."} +{"audio_filepath": "audio/swag_05_tompkins_0390.wav", "text": "here and there you happen on the home of the hardy pioneer the man on the land", "duration": 4.4, "text_no_preprocessing": "Here and there you happen on the home of the hardy pioneer\u2014the man on the land.", "text_normalized": "Here and there you happen on the home of the hardy pioneer-the man on the land."} +{"audio_filepath": "audio/nada_lily_18_haggard_0386.wav", "text": "because of their number", "duration": 1.52, "text_no_preprocessing": "because of their number.", "text_normalized": "because of their number."} +{"audio_filepath": "audio/roots_40_morris_0157.wav", "text": "and if perchance her hand touched some young man", "duration": 2.72, "text_no_preprocessing": "and if perchance her hand touched some young man,", "text_normalized": "and if perchance her hand touched some young man,"} +{"audio_filepath": "audio/hartmann_04_fawcett_0191.wav", "text": "i felt like the quarry of the wild huntsman of german legend", "duration": 3.84, "text_no_preprocessing": "I felt like the quarry of the wild huntsman of German legend.", "text_normalized": "I felt like the quarry of the wild huntsman of German legend."} +{"audio_filepath": "audio/bambatse_06_haggard_0280.wav", "text": "dressed all in white", "duration": 1.62, "text_no_preprocessing": "dressed all in white.", "text_normalized": "dressed all in white."} +{"audio_filepath": "audio/nada_lily_12_haggard_0523.wav", "text": "and their leaves are so thick that in certain places the light is as that of night when the moon is young", "duration": 5.96, "text_no_preprocessing": "and their leaves are so thick that in certain places the light is as that of night when the moon is young.", "text_normalized": "and their leaves are so thick that in certain places the light is as that of night when the moon is young."} +{"audio_filepath": "audio/roots_36_morris_0138.wav", "text": "but love constrained him", "duration": 1.46, "text_no_preprocessing": "but love constrained him,", "text_normalized": "but love constrained him,"} +{"audio_filepath": "audio/presentpictureofnsw_01_mann_0076.wav", "text": "whose offences against the laws had rendered their further residence in their native land", "duration": 4.76, "text_no_preprocessing": "whose offences against the laws had rendered their further residence in their native land,", "text_normalized": "whose offences against the laws had rendered their further residence in their native land,"} +{"audio_filepath": "audio/swag_04_tompkins_0193.wav", "text": "mentioning the number he may expect", "duration": 2.0, "text_no_preprocessing": "mentioning the number he may expect.", "text_normalized": "mentioning the number he may expect."} +{"audio_filepath": "audio/nada_lily_04_haggard_0027.wav", "text": "scores of them old men", "duration": 1.95, "text_no_preprocessing": "scores of them\u2014old men,", "text_normalized": "scores of them-old men,"} +{"audio_filepath": "audio/nada_lily_26_haggard_0157.wav", "text": "she answered hanging her head", "duration": 2.3, "text_no_preprocessing": "she answered, hanging her head;", "text_normalized": "she answered, hanging her head;"} +{"audio_filepath": "audio/hartmann_08_fawcett_0171.wav", "text": "with his machinery", "duration": 1.26, "text_no_preprocessing": "with his machinery.", "text_normalized": "with his machinery."} +{"audio_filepath": "audio/hartmann_11_fawcett_0056.wav", "text": "but would be glad to see me for a moment", "duration": 2.42, "text_no_preprocessing": "but would be glad to see me for a moment.", "text_normalized": "but would be glad to see me for a moment."} +{"audio_filepath": "audio/roots_50_morris_0154.wav", "text": "and saw that it was sober and unsmiling", "duration": 2.65, "text_no_preprocessing": "and saw that it was sober and unsmiling,", "text_normalized": "and saw that it was sober and unsmiling,"} +{"audio_filepath": "audio/bambatse_07_haggard_0054.wav", "text": "so that she may be sure she is told exactly what passes", "duration": 3.19, "text_no_preprocessing": "so that she may be sure she is told exactly what passes?\u201d", "text_normalized": "so that she may be sure she is told exactly what passes?"} +{"audio_filepath": "audio/bambatse_06_haggard_0163.wav", "text": "said old sally when he had vanished walking very fast", "duration": 3.28, "text_no_preprocessing": "said old Sally, when he had vanished, walking very fast.", "text_normalized": "said old Sally, when he had vanished, walking very fast."} +{"audio_filepath": "audio/jaufry_08_lafon_0098.wav", "text": "down to the earth he rolled beneath the stroke", "duration": 2.82, "text_no_preprocessing": "Down to the earth he rolled beneath the stroke.", "text_normalized": "Down to the earth he rolled beneath the stroke."} +{"audio_filepath": "audio/nada_lily_15_haggard_0288.wav", "text": "my brother", "duration": 0.8, "text_no_preprocessing": "\u201cMy brother,", "text_normalized": "My brother,"} +{"audio_filepath": "audio/roots_28_morris_0362.wav", "text": "when morning was come they gat them at once to the road", "duration": 3.14, "text_no_preprocessing": "When morning was come they gat them at once to the road;", "text_normalized": "When morning was come they gat them at once to the road;"} +{"audio_filepath": "audio/shortstories_06_gaskell_0784.wav", "text": "and money must go all for a child's nonsense", "duration": 2.96, "text_no_preprocessing": "and money must go all for a child's nonsense.", "text_normalized": "and money must go all for a child's nonsense."} +{"audio_filepath": "audio/roots_04_morris_0400.wav", "text": "so that he had to seek his way diligently", "duration": 2.94, "text_no_preprocessing": "so that he had to seek his way diligently.", "text_normalized": "so that he had to seek his way diligently."} +{"audio_filepath": "audio/jaufry_05_lafon_0217.wav", "text": "spite of his will", "duration": 1.23, "text_no_preprocessing": "Spite of his will,", "text_normalized": "Spite of his will,"} +{"audio_filepath": "audio/nada_lily_30_haggard_0086.wav", "text": "only a spear", "duration": 1.13, "text_no_preprocessing": "only a spear,", "text_normalized": "only a spear,"} +{"audio_filepath": "audio/nada_lily_17_haggard_0378.wav", "text": "and because the slaughterer had named her for his wife", "duration": 4.2, "text_no_preprocessing": "and because the Slaughterer had named her for his wife.", "text_normalized": "and because the Slaughterer had named her for his wife."} +{"audio_filepath": "audio/bambatse_22_haggard_0394.wav", "text": "elfish face of benita", "duration": 1.9, "text_no_preprocessing": "elfish face of Benita,", "text_normalized": "elfish face of Benita,"} +{"audio_filepath": "audio/shortstories_05_gaskell_0338.wav", "text": "when what was that", "duration": 1.51, "text_no_preprocessing": "when, what was that?", "text_normalized": "when, what was that?"} +{"audio_filepath": "audio/bambatse_04_haggard_0273.wav", "text": "said the old man", "duration": 1.16, "text_no_preprocessing": "said the old man,", "text_normalized": "said the old man,"} +{"audio_filepath": "audio/shortstories_10_gaskell_0142.wav", "text": "for though she had never read it", "duration": 1.45, "text_no_preprocessing": "for though she had never read it,", "text_normalized": "for though she had never read it,"} +{"audio_filepath": "audio/astralplane_03_leadbeater_0639.wav", "text": "to attain the appalling preeminence in evil which thus involves the entire loss of a personality and the weakening of the developing individuality behind", "duration": 10.08, "text_no_preprocessing": "To attain the appalling preeminence in evil which thus involves the entire loss of a personality and the weakening of the developing individuality behind,", "text_normalized": "To attain the appalling preeminence in evil which thus involves the entire loss of a personality and the weakening of the developing individuality behind,"} +{"audio_filepath": "audio/hartmann_05_fawcett_0100.wav", "text": "i looked long and earnestly", "duration": 2.1, "text_no_preprocessing": "I looked long and earnestly,", "text_normalized": "I looked long and earnestly,"} +{"audio_filepath": "audio/shortstories_06_gaskell_0920.wav", "text": "but latterly because she became really interested to hear of her ways", "duration": 4.1, "text_no_preprocessing": "but latterly because she became really interested to hear of her ways;", "text_normalized": "but latterly because she became really interested to hear of her ways;"} +{"audio_filepath": "audio/astralplane_06_leadbeater_0349.wav", "text": "and it would be impossible to detect by any ordinary means a forgery committed in this manner", "duration": 5.76, "text_no_preprocessing": "and it would be impossible to detect by any ordinary means a forgery committed in this manner.", "text_normalized": "and it would be impossible to detect by any ordinary means a forgery committed in this manner."} +{"audio_filepath": "audio/presentpictureofnsw_02_mann_0962.wav", "text": "of the difficulties which surround this task", "duration": 2.39, "text_no_preprocessing": "of the difficulties which surround this task,", "text_normalized": "of the difficulties which surround this task,"} +{"audio_filepath": "audio/bambatse_20_haggard_0146.wav", "text": "bringing her into touch with the world again", "duration": 2.44, "text_no_preprocessing": "bringing her into touch with the world again.", "text_normalized": "bringing her into touch with the world again."} +{"audio_filepath": "audio/nada_lily_04_haggard_0190.wav", "text": "easier to die by water than on iron", "duration": 2.52, "text_no_preprocessing": "\u201cEasier to die by water than on iron,\u201d", "text_normalized": "\"Easier to die by water than on iron,\""} +{"audio_filepath": "audio/roots_44_morris_0140.wav", "text": "and a tall man came tottering down", "duration": 2.38, "text_no_preprocessing": "and a tall man came tottering down;", "text_normalized": "and a tall man came tottering down;"} +{"audio_filepath": "audio/shortstories_06_gaskell_0776.wav", "text": "yet when he got up", "duration": 1.31, "text_no_preprocessing": "yet when he got up,", "text_normalized": "yet when he got up,"} +{"audio_filepath": "audio/roots_19_morris_0120.wav", "text": "thou art become a friend of the wolf", "duration": 2.06, "text_no_preprocessing": "thou art become a friend of the Wolf,", "text_normalized": "thou art become a friend of the Wolf,"} +{"audio_filepath": "audio/glitteringplain_07_morris_0054.wav", "text": "go forth into the hall", "duration": 1.58, "text_no_preprocessing": "go forth into the hall,", "text_normalized": "go forth into the hall,"} +{"audio_filepath": "audio/roots_27_morris_0352.wav", "text": "in a week's journey", "duration": 1.19, "text_no_preprocessing": "in a week's journey,", "text_normalized": "in a week's journey,"} +{"audio_filepath": "audio/bambatse_06_haggard_0090.wav", "text": "not in the skies", "duration": 1.12, "text_no_preprocessing": "not in the skies,", "text_normalized": "not in the skies,"} +{"audio_filepath": "audio/bambatse_19_haggard_0301.wav", "text": "and we shall find the gold", "duration": 1.82, "text_no_preprocessing": "and we shall find the gold.", "text_normalized": "and we shall find the gold."} +{"audio_filepath": "audio/roots_32_morris_0506.wav", "text": "and in sooth he was looking for her", "duration": 2.1, "text_no_preprocessing": "and in sooth he was looking for her;", "text_normalized": "and in sooth he was looking for her;"} +{"audio_filepath": "audio/roots_06_morris_0244.wav", "text": "but of silver", "duration": 1.15, "text_no_preprocessing": "but of silver;", "text_normalized": "but of silver;"} +{"audio_filepath": "audio/shortstories_05_gaskell_0539.wav", "text": "and was away before an answer could be given", "duration": 2.56, "text_no_preprocessing": "And was away before an answer could be given.", "text_normalized": "And was away before an answer could be given."} +{"audio_filepath": "audio/jaufry_01_lafon_0175.wav", "text": "beheld it thus his uncle carrying off", "duration": 2.88, "text_no_preprocessing": "beheld it thus his uncle carrying off,", "text_normalized": "beheld it thus his uncle carrying off,"} +{"audio_filepath": "audio/roots_09_morris_0085.wav", "text": "then unto you shall we come aback", "duration": 2.08, "text_no_preprocessing": "Then unto you shall we come aback,", "text_normalized": "Then unto you shall we come aback,"} +{"audio_filepath": "audio/presentpictureofnsw_04_mann_0473.wav", "text": "to the loss of eleven pounds five shillings a day to the crown", "duration": 3.6, "text_no_preprocessing": "to the loss of eleven pounds five shillings a day to the crown.", "text_normalized": "to the loss of eleven pounds five shillings a day to the crown."} +{"audio_filepath": "audio/nada_lily_25_haggard_0351.wav", "text": "two spear casts away", "duration": 1.85, "text_no_preprocessing": "two spear-casts away,", "text_normalized": "two spear-casts away,"} +{"audio_filepath": "audio/hartmann_11_fawcett_0337.wav", "text": "hitherto", "duration": 0.86, "text_no_preprocessing": "Hitherto,", "text_normalized": "Hitherto,"} +{"audio_filepath": "audio/nada_lily_15_haggard_0182.wav", "text": "and with them came the wolves", "duration": 2.1, "text_no_preprocessing": "and with them came the wolves.", "text_normalized": "and with them came the wolves."} +{"audio_filepath": "audio/jaufry_06_lafon_0466.wav", "text": "beside the bridge there sat an aged knight", "duration": 2.44, "text_no_preprocessing": "Beside the bridge there sat an aged knight,", "text_normalized": "Beside the bridge there sat an aged knight,"} +{"audio_filepath": "audio/hartmann_12_fawcett_0088.wav", "text": "it was entering the lion's den possibly only to find a victim before becoming one myself", "duration": 6.24, "text_no_preprocessing": "it was entering the lion's den possibly only to find a victim before becoming one myself;", "text_normalized": "it was entering the lion's den possibly only to find a victim before becoming one myself;"} +{"audio_filepath": "audio/swag_06_tompkins_0082.wav", "text": "when of a sudden comes the joyous sound of running water", "duration": 3.23, "text_no_preprocessing": "when of a sudden comes the joyous sound of running water;", "text_normalized": "when of a sudden comes the joyous sound of running water;"} +{"audio_filepath": "audio/glitteringplain_14_morris_0155.wav", "text": "and asked him what he would that they might serve him", "duration": 2.84, "text_no_preprocessing": "and asked him what he would that they might serve him.", "text_normalized": "and asked him what he would that they might serve him."} +{"audio_filepath": "audio/hartmann_12_fawcett_0193.wav", "text": "for these tactics might have proved my death warrant", "duration": 3.0, "text_no_preprocessing": "for these tactics might have proved my death-warrant.", "text_normalized": "for these tactics might have proved my death-warrant."} +{"audio_filepath": "audio/astralplane_04_leadbeater_0040.wav", "text": "to mention that in this branch of the subject two reservations have been made", "duration": 4.62, "text_no_preprocessing": "to mention that in this branch of the subject two reservations have been made.", "text_normalized": "to mention that in this branch of the subject two reservations have been made."} +{"audio_filepath": "audio/swag_03_tompkins_0380.wav", "text": "or vice versa", "duration": 1.28, "text_no_preprocessing": "or vice versa.", "text_normalized": "or vice versa."} +{"audio_filepath": "audio/jaufry_09_lafon_0365.wav", "text": "and felony towards our sovereign lord", "duration": 2.68, "text_no_preprocessing": "and felony towards our Sovereign Lord.", "text_normalized": "and felony towards our Sovereign Lord."} +{"audio_filepath": "audio/roots_39_morris_0340.wav", "text": "and there abode in peace the time of their summoning", "duration": 3.36, "text_no_preprocessing": "and there abode in peace the time of their summoning;", "text_normalized": "and there abode in peace the time of their summoning;"} +{"audio_filepath": "audio/glitteringplain_11_morris_0037.wav", "text": "and they clapped their hands together and kissed each other for joy of the new lover", "duration": 5.04, "text_no_preprocessing": "and they clapped their hands together and kissed each other for joy of the new lover;", "text_normalized": "and they clapped their hands together and kissed each other for joy of the new lover;"} +{"audio_filepath": "audio/bambatse_15_haggard_0013.wav", "text": "faltered benita", "duration": 1.24, "text_no_preprocessing": "faltered Benita.", "text_normalized": "faltered Benita."} +{"audio_filepath": "audio/hartmann_12_fawcett_0110.wav", "text": "after what seemed ages of waiting came his reply", "duration": 3.72, "text_no_preprocessing": "After what seemed ages of waiting came his reply.", "text_normalized": "After what seemed ages of waiting came his reply."} +{"audio_filepath": "audio/glitteringplain_20_morris_0336.wav", "text": "anyhow it is good enough for a grave", "duration": 2.62, "text_no_preprocessing": "anyhow it is good enough for a grave,", "text_normalized": "anyhow it is good enough for a grave,"} +{"audio_filepath": "audio/jaufry_07_lafon_0073.wav", "text": "her shoulders round and high", "duration": 1.88, "text_no_preprocessing": "her shoulders round and high;", "text_normalized": "her shoulders round and high;"} +{"audio_filepath": "audio/roots_44_morris_0112.wav", "text": "how loudly they cry on the hook for the reaping of men doomed to die", "duration": 4.47, "text_no_preprocessing": "how loudly they cry On the hook for the reaping of men doomed to die!", "text_normalized": "how loudly they cry On the hook for the reaping of men doomed to die!"} diff --git a/wer_testing/Readme.md b/wer_testing/Readme.md new file mode 100644 index 00000000000..88b1308ab05 --- /dev/null +++ b/wer_testing/Readme.md @@ -0,0 +1,15 @@ +# WER Testing For Whisper Cpp + +## Instructions: (Take note, this must be only after you have downloaded and made the model, else you can reference to either YT or the main dir) + +### How it works: +1. Just run ```WER_script.py``` + + +### What it does: +It calculates the WER based off the dataset for speaker 6097 of the HiTTS dataset. + +Link to original dataset: https://docs.nvidia.com/deeplearning/riva/user-guide/docs/tutorials/tts-evaluate.html + +## Current WER: +### Average: 0.3 diff --git a/wer_testing/WER_Script.py b/wer_testing/WER_Script.py new file mode 100644 index 00000000000..a2ae0f994eb --- /dev/null +++ b/wer_testing/WER_Script.py @@ -0,0 +1,166 @@ +import os +import subprocess +import re +import csv +import wave +import contextlib +import argparse +import json + +# Custom action to handle comma-separated list +class ListAction(argparse.Action): + def __call__(self, parser, namespace, values, option_string=None): + setattr(namespace, self.dest, [int(val) for val in values.split(",")]) + +parser = argparse.ArgumentParser(description="Benchmark the speech recognition model") + +# Define the argument to accept a list +parser.add_argument( + "-t", + "--threads", + dest="threads", + action=ListAction, + default=[4], + help="List of thread counts to benchmark (comma-separated, default: 4)", +) + +parser.add_argument( + "-p", + "--processors", + dest="processors", + action=ListAction, + default=[1], + help="List of processor counts to benchmark (comma-separated, default: 1)", +) + + +parser.add_argument( + "-f", + "--filename", + type=str, + default="./6097_5_mins/", + help="Relative path of the file to transcribe (default: ./samples/jfk.wav)", +) + +parser.add_argument( + "-s", + "--type_set", + type=str, + default="./6097_5_mins/manifest.json", + help="Running WER set based on the validation / test set from the Commands Dataset\nSet path for the dataset" +) + +# Parse the command line arguments +args = parser.parse_args() +models = [ + "ggml-tiny.en.bin", + "ggml-tiny.bin", + "ggml-base.en.bin", + "ggml-base.bin", + "ggml-small.en.bin", + "ggml-small.bin", + "ggml-medium.en.bin", + "ggml-medium.bin", + "ggml-large-v1.bin", + "ggml-large-v2.bin", + "ggml-large-v3.bin", + "ggml-large-v3-turbo.bin", +] + +validating_files = args.type_set +sample_folder = args.filename +threads = args.threads +processors = args.processors + +def check_folder_exists(file: str) -> bool: + return os.path.isdir(file) + +def check_file_exists(file): + return os.path.isfile(file) + + +if not check_folder_exists(sample_folder): + raise FileNotFoundError(f"Sample file {sample_folder} not found") + +filtered_models = [] +for model in models: + if check_file_exists(f"../models/{model}"): + filtered_models.append(model) + else: + print(f"Model {model} not found, removing from list") + +def filtered_text(output): + pattern = re.compile(r'\[\d{2}:\d{2}:\d{2}\.\d{3} --> \d{2}:\d{2}:\d{2}\.\d{3}\]\s+(.*)') + match = pattern.findall(output) + return match + +models = filtered_models + +# read the validation list +manifest_data = [] +with open(validating_files, 'r') as file: + for line in file: + manifest_data.append(json.loads(line)) + +def calculate_wer(text, origin_word): + ref_words = origin_word.split() + hyp_words = text.split() + + if not ref_words and not hyp_words: + return 0 + + elif not ref_words: + return float('inf') if hyp_words else 1 + elif not hyp_words: + return float('inf') + + # Initialize the dynamic programming table (list of lists) + d = [[0 for j in range(len(hyp_words) + 1)] for i in range(len(ref_words) + 1)] + + for i in range(len(ref_words) + 1): + d[i][0] = i + for j in range(len(hyp_words) + 1): + d[0][j] = j + + for i in range(1, len(ref_words) + 1): + for j in range(1, len(hyp_words) + 1): + if ref_words[i - 1] == hyp_words[j - 1]: + d[i][j] = d[i - 1][j - 1] + else: + d[i][j] = 1 + min(d[i - 1][j], d[i][j - 1], d[i - 1][j - 1]) + + wer = d[len(ref_words)][len(hyp_words)] / len(ref_words) + return wer + +avg_size = len(manifest_data) +total_wer = 0 +for model in filtered_models: + for thread in threads: + for processor_count in processors: + for file in manifest_data: # we are running each iteration of the manifestation data + audio_filepath = file['audio_filepath'] + audio_text = file['text'] + sample_file_path = sample_folder + audio_filepath + print("printing out command:") + cmd = f"../build/bin/whisper-cli -m ../models/{model} -t {thread} -p {processor_count} -f {sample_file_path}" + process = subprocess.Popen( + cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT + ) + print(cmd) + output = "" + while process.poll() is None: + output += process.stdout.read().decode() + final_word = filtered_text(output) + print(f"Word transcribed is : {final_word}") + print(f"Actual word is: {audio_text}") + if len(final_word) == 0: + print(f"wer for {audio_filepath} is 1") + continue + wer = calculate_wer(final_word[0], audio_text) + print(f"wer for {audio_filepath} is {round(wer,2)}") + total_wer += wer + +print(f"Final WER is {round(total_wer / avg_size, 2)}") + + +