Commit 11c8e38

committed

vocoder complete

1 parent a74ca1d commit 11c8e38Copy full SHA for 11c8e38

File tree

1 file changed

+129

-20

lines changed

main.cpp

1 file changed

+129

-20

lines changed

`‎main.cpp‎`

Lines changed: 129 additions & 20 deletions

Original file line number	Diff line number	Diff line change
`@@ -3831,19 +3831,25 @@ struct ggml_cgraph * vocoder_graph(`
`3831`	`3831`
`3832`	`3832`	`cur = ggml_cpy(ctx0, cur, ggml_new_tensor(ctx0, GGML_TYPE_F32,4,cur->ne));`
`3833`	`3833`
`3834`		`- int strides[] = {8,4,4};`
`3835`		`- int paddings[] = {4,2,2};`
	`3834`	`+ int strides[] = {8,8,4};`
	`3835`	`+ int paddings[] = {4,4,2};`
`3836`	`3836`	`int hop_sizes[] = {8,64,256};`
`3837`		`-`
	`3837`	`+ //const int kernel_size = 3;`
	`3838`	`+ //const int dilation = 1;`
	`3839`	`+ const int padding_length = 1;`
`3838`	`3840`
`3839`	`3841`
`3840`	`3842`	`struct ggml_tensor * conditioning;`
`3841`	`3843`
`3842`	`3844`	`//graph tether`
`3843`	`3845`	`//res blocks`
`3844`		`- for (int i =0; i < 1; i++)`
	`3846`	`+ for (int i =0; i < 3; i++)`
`3845`	`3847`	`{`
`3846`	`3848`
	`3849`	`+`
	`3850`	`+`
	`3851`	`+`
	`3852`	`+`
`3847`	`3853`	`ggml_tensor * float_32_conv_transpose_1d_weight= ggml_cont(ctx0,ggml_cpy(ctx0, model.residual_stack[i].convolution_t_pre_weight, ggml_new_tensor(ctx0, GGML_TYPE_F32,4,model.residual_stack[i].convolution_t_pre_weight->ne)));`
`3848`	`3854`
`3849`	`3855`	`cur = ggml_cont(ctx0,ggml_leaky_relu(ctx0, cur, 0.2, false));`
`@@ -3854,7 +3860,7 @@ struct ggml_cgraph * vocoder_graph(`
`3854`	`3860`
`3855`	`3861`	`cur = ggml_cont(ctx0,ggml_transpose(ctx0,ggml_add(ctx0, ggml_cont(ctx0, ggml_transpose(ctx0, cur)), model.residual_stack[i].convolution_t_pre_bias)));`
`3856`	`3862`
`3857`		`-`
	`3863`	`+`
`3858`	`3864`
`3859`	`3865`	`conditioning = ggml_cpy(ctx0, padded_mel, ggml_new_tensor(ctx0, GGML_TYPE_F32,4,padded_mel->ne));`
`3860`	`3866`
`@@ -3962,21 +3968,26 @@ struct ggml_cgraph * vocoder_graph(`
`3962`	`3968`
`3963`	`3969`	`for (int c = 0; c < 4; c++)`
`3964`	`3970`	`{`
	`3971`	`+`
`3965`	`3972`	`output = ggml_leaky_relu(ctx0, cur, 0.2, false);`
`3966`	`3973`
	`3974`	`+`
`3967`	`3975`	`float_16_conv_1d_weight= ggml_cpy(ctx0, model.residual_stack[i].conv_blocks[c].conv_block_1_weight, ggml_new_tensor(ctx0, GGML_TYPE_F16,4,model.residual_stack[i].conv_blocks[c].conv_block_1_weight->ne));`
`3968`	`3976`
`3969`	`3977`	`output = ggml_cont(ctx0,ggml_conv_1d(ctx0, float_16_conv_1d_weight, output, 1,conv_block_paddings[c],conv_block_dilations[c]));`
`3970`	`3978`
`3971`	`3979`
`3972`	`3980`	`output = ggml_cpy(ctx0, output, ggml_new_tensor(ctx0, GGML_TYPE_F32,4,output->ne));`
`3973`	`3981`
	`3982`	`+`
`3974`	`3983`
`3975`	`3984`	`output = ggml_cont(ctx0,ggml_transpose(ctx0,ggml_add(ctx0, ggml_cont(ctx0, ggml_transpose(ctx0, output)), model.residual_stack[i].conv_blocks[c].conv_block_1_bias)));`
`3976`	`3985`
`3977`	`3986`
`3978`	`3987`	`output = ggml_cpy(ctx0, output, ggml_new_tensor(ctx0, GGML_TYPE_F32,4,output->ne));`
`3979`	`3988`
	`3989`	`+`
	`3990`	`+`
`3980`	`3991`	`output = ggml_leaky_relu(ctx0, output, 0.2, false);`
`3981`	`3992`
`3982`	`3993`	`k = ggml_cont(ctx0,ggml_view_3d(ctx0, kernels, kernels->ne[0], 6144, 1, kernels->nb[1], kernels->nb[2], c * kernels->ne[0] * 6144 * sizeof(float)));`
`@@ -3987,7 +3998,13 @@ struct ggml_cgraph * vocoder_graph(`
`3987`	`3998`
`3988`	`3999`	`output = ggml_pad_ext(ctx0, output, 1,1,0,0,0,0,0,0);`
`3989`	`4000`
`3990`		`- output = ggml_unfold_1d(ctx0, output, 10, 8);`
	`4001`	`+ std::cout << "output shape" << std::endl;`
	`4002`	`+ std::cout << output->ne[0] << "," << output->ne[1] << "," << output->ne[2] << "," << output->ne[3] << std::endl;`
	`4003`	`+ std::cout << hop_sizes[i] + 2 * padding_length << "," << hop_sizes[i] << std::endl;`
	`4004`	`+`
	`4005`	`+ output = ggml_unfold_1d(ctx0, output, hop_sizes[i] + 2 * padding_length, hop_sizes[i]);`
	`4006`	`+`
	`4007`	`+`
`3991`	`4008`
`3992`	`4009`	`output = ggml_unfold_1d(ctx0, output, 1, 1);`
`3993`	`4010`
`@@ -3996,49 +4013,85 @@ struct ggml_cgraph * vocoder_graph(`
`3996`	`4013`
`3997`	`4014`	`const int output_length = output->ne[2];`
`3998`	`4015`
`3999`		`- output = ggml_reshape_3d(ctx0, output, 10, 1, output_length * 32);`
	`4016`	`+`
	`4017`	`+`
	`4018`	`+ output = ggml_reshape_3d(ctx0, output, hop_sizes[i] + 2 * padding_length, 1, output_length * 32);`
`4000`	`4019`
`4001`	`4020`	`output = ggml_unfold_1d(ctx0, output, 3,1);`
`4002`	`4021`
	`4022`	`+`
	`4023`	`+`
	`4024`	`+`
`4003`	`4025`
`4004`	`4026`	`// o = torch.einsum('bildsk,biokl->bolsd', x, kernel)`
`4005`	`4027`
`4006`	`4028`
`4007`		`- output = ggml_reshape_4d(ctx0, output, 3, 8 , output_length, 32);`
	`4029`	`+ output = ggml_reshape_4d(ctx0, output, 3, hop_sizes[i] , output_length, 32);`
`4008`	`4030`
`4009`	`4031`	`reshaped_kernel = ggml_reshape_4d(ctx0, k, output_length, 3, 64, 32);`
`4010`	`4032`
`4011`	`4033`	`reshaped_kernel = ggml_cont(ctx0, ggml_permute(ctx0, reshaped_kernel, 2,0,1,3));`
`4012`	`4034`
	`4035`	`+`
	`4036`	`+`
	`4037`	`+`
`4013`	`4038`	`output = ggml_mul_mat(ctx0, reshaped_kernel, output);`
`4014`	`4039`
`4015`		`- output_accumulator = ggml_cont(ctx0,ggml_view_4d(ctx0, output, 64, 8, output_length,`
`4016`		`- 1, output->nb[1], output->nb[2], output->nb[3], 0 * output_length * 64 * 8 * sizeof(float) ));`
	`4040`	`+`
	`4041`	`+`
	`4042`	`+`
	`4043`	`+ output_accumulator = ggml_cont(ctx0,ggml_view_4d(ctx0, output, 64, hop_sizes[i], output_length,`
	`4044`	`+ 1, output->nb[1], output->nb[2], output->nb[3], 0 * output_length * 64 * hop_sizes[i] * sizeof(float) ));`
`4017`	`4045`	`for (int j = 1; j < 32; j++ )`
`4018`	`4046`	`{`
`4019`		`- output_accumulator = ggml_add(ctx0, output_accumulator, ggml_cont(ctx0,ggml_view_4d(ctx0, output, 64, 8, output_length,`
`4020`		`- 1, output->nb[1], output->nb[2], output->nb[3], j * output_length * 64 * 8 * sizeof(float) )));`
	`4047`	`+ output_accumulator = ggml_add(ctx0, output_accumulator, ggml_cont(ctx0,ggml_view_4d(ctx0, output, 64, hop_sizes[i], output_length,`
	`4048`	`+ 1, output->nb[1], output->nb[2], output->nb[3], j * output_length * 64 * hop_sizes[i] * sizeof(float) )));`
`4021`	`4049`	`}`
`4022`	`4050`
	`4051`	`+`
	`4052`	`+`
`4023`	`4053`
`4024`	`4054`	`output = ggml_cont(ctx0,ggml_permute(ctx0, output_accumulator, 3,1,2,0));`
`4025`	`4055`
	`4056`	`+`
	`4057`	`+`
`4026`	`4058`	`output = ggml_add(ctx0, output, ggml_reshape_4d(ctx0, b, 1,1, output_length, 64));`
`4027`	`4059`
`4028`		`- output = ggml_reshape_3d(ctx0, output,1, 8*output_length, 64);`
	`4060`	`+ output = ggml_reshape_3d(ctx0, output,1, hop_sizes[i]*output_length, 64);`
`4029`	`4061`
	`4062`	`+ /*`
	`4063`	`+ if (i >= 1)`
	`4064`	`+ {`
	`4065`	`+ cur = output;`
	`4066`	`+ goto end;`
	`4067`	`+ }*/`
`4030`	`4068`
`4031`		`- output_half_1 = ggml_sigmoid(ctx0,ggml_cont(ctx0,ggml_view_3d(ctx0, output, 1, 8output_length,32, output->nb[1], output->nb[2], 0 1 * 8 * output_length * 32 * sizeof(float) )));`
`4032`		`- output_half_2 = ggml_tanh(ctx0,ggml_cont(ctx0,ggml_view_3d(ctx0, output, 1, 8output_length,32, output->nb[1], output->nb[2], 1 1 * 8 * output_length * 32 * sizeof(float) )));`
	`4069`	`+ output_half_1 = ggml_sigmoid(ctx0,ggml_cont(ctx0,ggml_view_3d(ctx0, output, 1, hop_sizes[i]output_length,32, output->nb[1], output->nb[2], 0 1 * hop_sizes[i] * output_length * 32 * sizeof(float) )));`
	`4070`	`+ output_half_2 = ggml_tanh(ctx0,ggml_cont(ctx0,ggml_view_3d(ctx0, output, 1, hop_sizes[i]output_length,32, output->nb[1], output->nb[2], 1 1 * hop_sizes[i] * output_length * 32 * sizeof(float) )));`
`4033`	`4071`
`4034`		`- cur = ggml_add(ctx0, cur, ggml_reshape_4d(ctx0,ggml_mul(ctx0,output_half_1, output_half_2), output_length * 8, 32,1,1));`
	`4072`	`+ cur = ggml_add(ctx0, cur, ggml_reshape_4d(ctx0,ggml_mul(ctx0,output_half_1, output_half_2), output_length * hop_sizes[i], 32,1,1));`
`4035`	`4073`
`4036`	`4074`
`4037`	`4075`	`}`
`4038`	`4076`
`4039`	`4077`
`4040`	`4078`	`}`
`4041`	`4079`
	`4080`	`+ cur = ggml_leaky_relu(ctx0, cur, 0.2, false);`
	`4081`	`+`
	`4082`	`+ float_16_conv_1d_weight= ggml_cpy(ctx0, model.convolution_post_weight, ggml_new_tensor(ctx0, GGML_TYPE_F16,4,model.convolution_post_weight->ne));`
	`4083`	`+ cur = ggml_cont(ctx0,ggml_conv_1d(ctx0, float_16_conv_1d_weight, cur, 1,0,1 ));`
	`4084`	`+`
	`4085`	`+`
	`4086`	`+ cur = ggml_cpy(ctx0, cur, ggml_new_tensor(ctx0, GGML_TYPE_F32,4,cur->ne));`
	`4087`	`+`
	`4088`	`+`
	`4089`	`+ cur = ggml_cont(ctx0,ggml_transpose(ctx0,ggml_add(ctx0, ggml_cont(ctx0, ggml_transpose(ctx0, cur)), model.convolution_post_bias)));`
	`4090`	`+`
	`4091`	`+`
	`4092`	`+ cur = ggml_cpy(ctx0, cur, ggml_new_tensor(ctx0, GGML_TYPE_F32,4,cur->ne));`
	`4093`	`+`
	`4094`	`+`
`4042`	`4095`
`4043`	`4096`	`ggml_build_forward_expand(gf, cur);`
`4044`	`4097`	`ggml_set_name(cur, "vocoder_output");`
`@@ -4439,6 +4492,54 @@ std::vector<int> process_logits_and_sample(ggml_cgraph * gf, std::vector<int> &`
`4439`	`4492`	`}`
`4440`	`4493`
`4441`	`4494`
	`4495`	`+// thanks gpt3.5`
	`4496`	`+// Function to write a WAV file from floating-point data`
	`4497`	`+void writeWav(const char* filename, const std::vector<float>& data, int sampleRate) {`
	`4498`	`+ // WAV file parameters`
	`4499`	`+ int numChannels = 1; // Mono`
	`4500`	`+ int bitsPerSample = 32; // Float (32-bit)`
	`4501`	`+ int byteRate = sampleRate * numChannels * bitsPerSample / 8;`
	`4502`	`+ int blockAlign = numChannels * bitsPerSample / 8;`
	`4503`	`+`
	`4504`	`+ // Open the output file`
	`4505`	`+ std::ofstream outFile(filename, std::ios::binary);`
	`4506`	`+ if (!outFile.is_open()) {`
	`4507`	`+ std::cerr << "Error opening output file." << std::endl;`
	`4508`	`+ return;`
	`4509`	`+ }`
	`4510`	`+`
	`4511`	`+ // Write the WAV header`
	`4512`	`+ // RIFF header`
	`4513`	`+ outFile.write("RIFF", 4);`
	`4514`	`+ int fileSize = 36 + data.size() * sizeof(float); // Size of the entire file minus 8 bytes`
	`4515`	`+ outFile.write(reinterpret_cast<const char*>(&fileSize), 4);`
	`4516`	`+ outFile.write("WAVE", 4);`
	`4517`	`+`
	`4518`	`+ // Format subchunk`
	`4519`	`+ outFile.write("fmt ", 4);`
	`4520`	`+ int fmtSize = 16;`
	`4521`	`+ outFile.write(reinterpret_cast<const char*>(&fmtSize), 4);`
	`4522`	`+ int audioFormat = 3; // Floating point PCM`
	`4523`	`+ outFile.write(reinterpret_cast<const char*>(&audioFormat), 2);`
	`4524`	`+ outFile.write(reinterpret_cast<const char*>(&numChannels), 2);`
	`4525`	`+ outFile.write(reinterpret_cast<const char*>(&sampleRate), 4);`
	`4526`	`+ outFile.write(reinterpret_cast<const char*>(&byteRate), 4);`
	`4527`	`+ outFile.write(reinterpret_cast<const char*>(&blockAlign), 2);`
	`4528`	`+ outFile.write(reinterpret_cast<const char*>(&bitsPerSample), 2);`
	`4529`	`+`
	`4530`	`+ // Data subchunk`
	`4531`	`+ outFile.write("data", 4);`
	`4532`	`+ int dataSize = data.size() * sizeof(float);`
	`4533`	`+ outFile.write(reinterpret_cast<const char*>(&dataSize), 4);`
	`4534`	`+`
	`4535`	`+ // Write the audio data`
	`4536`	`+ outFile.write(reinterpret_cast<const char*>(data.data()), dataSize);`
	`4537`	`+`
	`4538`	`+ // Close the file`
	`4539`	`+ outFile.close();`
	`4540`	`+`
	`4541`	`+ std::cout << "WAV file saved successfully." << std::endl;`
	`4542`	`+}`
`4442`	`4543`
`4443`	`4544`
`4444`	`4545`
`@@ -5869,7 +5970,7 @@ int main(int argc, char ** argv) {`
`5869`	`5970`
`5870`	`5971`
`5871`	`5972`
`5872`		`- /*`
	`5973`	`+`
`5873`	`5974`	`gpt_vocab vocab;`
`5874`	`5975`	`gpt_vocab_init("../models/tokenizer.json", vocab);`
`5875`	`5976`
`@@ -5879,7 +5980,8 @@ int main(int argc, char ** argv) {`
`5879`	`5980`	`//exit(0);`
`5880`	`5981`
`5881`	`5982`
`5882`		`-`
	`5983`	`+ //std::vector<gpt_vocab::id> tokens = ::parse_tokens_from_string("255,147,2,54,2,14,2,33,218,2,26,61,150,112,0,0", ','); // "This is a test message"`
	`5984`	`+`
`5883`	`5985`	`std::vector<gpt_vocab::id> tokens = ::parse_tokens_from_string("255,15,55,49,9,9,9,2,134,16,51,31,2,19,46,18,176,13,0,0", ','); //"Based... Dr. Freeman?"`
`5884`	`5986`
`5885`	`5987`
`@@ -5899,9 +6001,9 @@ int main(int argc, char ** argv) {`
`5899`	`6001`	`save_f32_vector("./logs/mel.bin", mel);`
`5900`	`6002`	`std::cout << mel.size() <<std::endl;`
`5901`	`6003`
`5902`		`- */`
	`6004`	`+`
`5903`	`6005`
`5904`		`- std::vector<float> mel = load_f32_vector("./logs/mel.bin", 187 * 100 * sizeof(float));`
	`6006`	`+ //std::vector<float> mel = load_f32_vector("./logs/mel.bin", 187 * 100 * sizeof(float));`
`5905`	`6007`
`5906`	`6008`
`5907`	`6009`	`std::string vocoder_model_file_path = "../models/ggml-vocoder-model.bin";`
`@@ -5979,6 +6081,13 @@ int main(int argc, char ** argv) {`
`5979`	`6081`	`print_all_tensors(vocoder_gf, false, true, "vocoder_output");`
`5980`	`6082`	`print_all_tensors(vocoder_gf, true, true, "vocoder_output");`
`5981`	`6083`
	`6084`	`+ std::vector<float> audio = std::vector<float>();`
	`6085`	`+`
	`6086`	`+ extract_tensor_to_vector( vocoder_gf->nodes[vocoder_gf->n_nodes -1] , audio);`
	`6087`	`+`
	`6088`	`+`
	`6089`	`+ writeWav("based?.wav", audio , 24000);`
	`6090`	`+`
`5982`	`6091`
`5983`	`6092`
`5984`	`6093`	`ggml_gallocr_free(vocoder_allocr);`

0 commit comments

Comments

(0)

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit 11c8e38

File tree

1 file changed

1 file changed

`‎main.cpp‎`

0 commit comments