Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 14dec0c

Browse files
CISCngxson
andauthored
main: use jinja chat template system prompt by default (ggml-org#12118)
* Use jinja chat template system prompt by default * faster conditional order * remove nested ternary --------- Co-authored-by: Xuan Son Nguyen <son@huggingface.co>
1 parent 1782cdf commit 14dec0c

File tree

1 file changed

+20
-11
lines changed

1 file changed

+20
-11
lines changed

‎examples/main/main.cpp‎

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,6 @@
3131
#pragma warning(disable: 4244 4267) // possible loss of data
3232
#endif
3333

34-
static const char * DEFAULT_SYSTEM_MESSAGE = "You are a helpful assistant";
35-
3634
static llama_context ** g_ctx;
3735
static llama_model ** g_model;
3836
static common_sampler ** g_smpl;
@@ -267,6 +265,7 @@ int main(int argc, char ** argv) {
267265

268266
std::vector<llama_token> embd_inp;
269267

268+
bool waiting_for_first_input = params.conversation_mode && params.enable_chat_template && params.system_prompt.empty();
270269
auto chat_add_and_format = [&chat_msgs, &chat_templates](const std::string & role, const std::string & content) {
271270
common_chat_msg new_msg;
272271
new_msg.role = role;
@@ -278,11 +277,20 @@ int main(int argc, char ** argv) {
278277
};
279278

280279
{
281-
auto prompt = (params.conversation_mode && params.enable_chat_template)
282-
// format the system prompt in conversation mode (fallback to default if empty)
283-
? chat_add_and_format("system", params.system_prompt.empty() ? DEFAULT_SYSTEM_MESSAGE : params.system_prompt)
280+
std::string prompt;
281+
282+
if (params.conversation_mode && params.enable_chat_template) {
283+
// format the system prompt in conversation mode (will use template default if empty)
284+
prompt = params.system_prompt;
285+
286+
if (!prompt.empty()) {
287+
prompt = chat_add_and_format("system", prompt);
288+
}
289+
} else {
284290
// otherwise use the prompt as is
285-
: params.prompt;
291+
prompt = params.prompt;
292+
}
293+
286294
if (params.interactive_first || !params.prompt.empty() || session_tokens.empty()) {
287295
LOG_DBG("tokenize the prompt\n");
288296
embd_inp = common_tokenize(ctx, prompt, true, true);
@@ -296,7 +304,7 @@ int main(int argc, char ** argv) {
296304
}
297305

298306
// Should not run without any tokens
299-
if (embd_inp.empty()) {
307+
if (!params.conversation_mode && embd_inp.empty()) {
300308
if (add_bos) {
301309
embd_inp.push_back(llama_vocab_bos(vocab));
302310
LOG_WRN("embd_inp was considered empty and bos was added: %s\n", string_from(ctx, embd_inp).c_str());
@@ -777,7 +785,7 @@ int main(int argc, char ** argv) {
777785
}
778786

779787
// deal with end of generation tokens in interactive mode
780-
if (llama_vocab_is_eog(vocab, common_sampler_last(smpl))) {
788+
if (!waiting_for_first_input && llama_vocab_is_eog(vocab, common_sampler_last(smpl))) {
781789
LOG_DBG("found an EOG token\n");
782790

783791
if (params.interactive) {
@@ -797,12 +805,12 @@ int main(int argc, char ** argv) {
797805
}
798806

799807
// if current token is not EOG, we add it to current assistant message
800-
if (params.conversation_mode) {
808+
if (params.conversation_mode && !waiting_for_first_input) {
801809
const auto id = common_sampler_last(smpl);
802810
assistant_ss << common_token_to_piece(ctx, id, false);
803811
}
804812

805-
if (n_past > 0 && is_interacting) {
813+
if ((n_past > 0 || waiting_for_first_input) && is_interacting) {
806814
LOG_DBG("waiting for user input\n");
807815

808816
if (params.conversation_mode) {
@@ -892,11 +900,12 @@ int main(int argc, char ** argv) {
892900
input_echo = false; // do not echo this again
893901
}
894902

895-
if (n_past > 0) {
903+
if (n_past > 0 || waiting_for_first_input) {
896904
if (is_interacting) {
897905
common_sampler_reset(smpl);
898906
}
899907
is_interacting = false;
908+
waiting_for_first_input = false;
900909
}
901910
}
902911

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /