From 44e18ef93995f3040660750b527e5becf85899d0 Mon Sep 17 00:00:00 2001 From: Jeff Bolz Date: Sat, 18 Jan 2025 02:26:50 -0600 Subject: [PATCH 1/9] vulkan: fix coopmat2 flash attention for non-contiguous inputs (#11281) Add code similar to mul_mm_cm2 to force alignment of strides, to avoid a performance regression. Add noncontiguous FA tests in test-backend-ops. Fixes #11268. --- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 43 ++++++++++++++++--- .../vulkan-shaders/flash_attn_cm2.comp | 20 +++++++++ tests/test-backend-ops.cpp | 31 ++++++++++--- 3 files changed, 82 insertions(+), 12 deletions(-) diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index 8e3e91495..437e9cdcc 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -386,10 +386,13 @@ struct vk_flash_attn_push_constants { uint32_t nev3; uint32_t nem1; + uint32_t nb01; uint32_t nb02; uint32_t nb03; + uint32_t nb11; uint32_t nb12; uint32_t nb13; + uint32_t nb21; uint32_t nb22; uint32_t nb23; uint32_t nb31; @@ -4809,7 +4812,14 @@ static void ggml_vk_flash_attn(ggml_backend_vk_context * ctx, vk_context& subctx } assert(pipelines); - bool aligned = (KV % pipelines[1]->align) == 0; + const uint32_t q_stride = (uint32_t)(nbq1 / ggml_type_size(q->type)); + const uint32_t k_stride = (uint32_t)(nbk1 / ggml_type_size(k->type)); + const uint32_t v_stride = (uint32_t)(nbv1 / ggml_type_size(v->type)); + + bool aligned = (KV % pipelines[1]->align) == 0 && + // the "aligned" shader variant will forcibly align strides, for performance + (q_stride & 7) == 0 && (k_stride & 7) == 0 && (v_stride & 7) == 0; + vk_pipeline pipeline = pipelines[aligned]; assert(pipeline); @@ -4845,15 +4855,15 @@ static void ggml_vk_flash_attn(ggml_backend_vk_context * ctx, vk_context& subctx if (ctx->device->uma) { ggml_vk_host_get(ctx->device, q->data, d_Q, q_buf_offset); - ggml_vk_host_get(ctx->device, k->data, d_K, q_buf_offset); - ggml_vk_host_get(ctx->device, v->data, d_V, q_buf_offset); - ggml_vk_host_get(ctx->device, dst->data, d_D, q_buf_offset); + ggml_vk_host_get(ctx->device, k->data, d_K, k_buf_offset); + ggml_vk_host_get(ctx->device, v->data, d_V, v_buf_offset); + ggml_vk_host_get(ctx->device, dst->data, d_D, d_buf_offset); Q_uma = d_Q != nullptr; K_uma = d_K != nullptr; V_uma = d_V != nullptr; D_uma = d_D != nullptr; if (mask) { - ggml_vk_host_get(ctx->device, mask->data, d_M, q_buf_offset); + ggml_vk_host_get(ctx->device, mask->data, d_M, m_buf_offset); M_uma = d_M != nullptr; } } @@ -4891,7 +4901,18 @@ static void ggml_vk_flash_attn(ggml_backend_vk_context * ctx, vk_context& subctx } } - const vk_flash_attn_push_constants pc = { N, KV, (uint32_t)ne1, (uint32_t)ne2, (uint32_t)ne3, (uint32_t)neq2, (uint32_t)neq3, (uint32_t)nek2, (uint32_t)nek3, (uint32_t)nev2, (uint32_t)nev3, nem1, (uint32_t)nbq2, (uint32_t)nbq3, (uint32_t)nbk2, (uint32_t)nbk3, (uint32_t)nbv2, (uint32_t)nbv3, nbm1, scale, max_bias, logit_softcap, mask != nullptr, n_head_log2, m0, m1 }; + const vk_flash_attn_push_constants pc = { N, KV, + (uint32_t)ne1, (uint32_t)ne2, (uint32_t)ne3, + (uint32_t)neq2, (uint32_t)neq3, + (uint32_t)nek2, (uint32_t)nek3, + (uint32_t)nev2, (uint32_t)nev3, + nem1, + q_stride, (uint32_t)nbq2, (uint32_t)nbq3, + k_stride, (uint32_t)nbk2, (uint32_t)nbk3, + v_stride, (uint32_t)nbv2, (uint32_t)nbv3, + nbm1, + scale, max_bias, logit_softcap, + mask != nullptr, n_head_log2, m0, m1 }; ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { vk_subbuffer{d_Q, q_buf_offset, VK_WHOLE_SIZE}, @@ -8668,6 +8689,7 @@ static void ggml_vk_check_results_1(ggml_tensor * tensor) { ggml_tensor * src0 = tensor->src[0]; ggml_tensor * src1 = tensor->src[1]; ggml_tensor * src2 = tensor->src[2]; + ggml_tensor * src3 = tensor->src[3]; void * tensor_data = tensor->data; @@ -8730,6 +8752,9 @@ static void ggml_vk_check_results_1(ggml_tensor * tensor) { if (src2 != nullptr) { std::cerr << "src2=" << src2 << " src2->name=" << src2->name << " op=" << ggml_op_name(src2->op) << " type=" << ggml_type_name(src2->type) << " ne0=" << src2->ne[0] << " nb0=" << src2->nb[0] << " ne1=" << src2->ne[1] << " nb1=" << src2->nb[1] << " ne2=" << src2->ne[2] << " nb2=" << src2->nb[2] << " ne3=" << src2->ne[3] << " nb3=" << src2->nb[3] << " offset=" << src2->view_offs << std::endl; } + if (src3 != nullptr) { + std::cerr << "src3=" << src3 << " src3->name=" << src3->name << " op=" << ggml_op_name(src3->op) << " type=" << ggml_type_name(src3->type) << " ne0=" << src3->ne[0] << " nb0=" << src3->nb[0] << " ne1=" << src3->ne[1] << " nb1=" << src3->nb[1] << " ne2=" << src3->ne[2] << " nb2=" << src3->nb[2] << " ne3=" << src3->ne[3] << " nb3=" << src3->nb[3] << " offset=" << src3->view_offs << std::endl; + } std::cerr << "First error: result=" << first_error_result << " correct=" << first_error_correct << " i3=" << first_error[3] << " i2=" << first_error[2] << " i1=" << first_error[1] << " i0=" << first_error[0] << std::endl; std::cerr << std::endl << "Result:" << std::endl; ggml_vk_print_tensor_area(tensor, tensor_data, i0, i1, i2, i3); @@ -8774,6 +8799,9 @@ static void ggml_vk_check_results_1(ggml_tensor * tensor) { if (src2 != nullptr) { std::cerr << "src2=" << src2 << " op=" << ggml_op_name(src2->op) << " type=" << ggml_type_name(src2->type) << " ne0=" << src2->ne[0] << " nb0=" << src2->nb[0] << " ne1=" << src2->ne[1] << " nb1=" << src2->nb[1] << " ne2=" << src2->ne[2] << " nb2=" << src2->nb[2] << " ne3=" << src2->ne[3] << " nb3=" << src2->nb[3] << " offset=" << src2->view_offs << std::endl; } + if (src3 != nullptr) { + std::cerr << "src3=" << src3 << " op=" << ggml_op_name(src3->op) << " type=" << ggml_type_name(src3->type) << " ne0=" << src3->ne[0] << " nb0=" << src3->nb[0] << " ne1=" << src3->ne[1] << " nb1=" << src3->nb[1] << " ne2=" << src3->ne[2] << " nb2=" << src3->nb[2] << " ne3=" << src3->ne[3] << " nb3=" << src3->nb[3] << " offset=" << src3->view_offs << std::endl; + } std::cerr << "First error: result=" << first_error_result << " correct=" << first_error_correct << " i3=" << first_error[3] << " i2=" << first_error[2] << " i1=" << first_error[1] << " i0=" << first_error[0] << std::endl; std::cerr << std::endl << "Result:" << std::endl; ggml_vk_print_tensor_area(tensor, tensor_data, 5, 5, 0, 0); @@ -8796,6 +8824,9 @@ static void ggml_vk_check_results_1(ggml_tensor * tensor) { if (src2 != nullptr) { std::cerr << "src2=" << src2 << " op=" << ggml_op_name(src2->op) << " type=" << ggml_type_name(src2->type) << " ne0=" << src2->ne[0] << " nb0=" << src2->nb[0] << " ne1=" << src2->ne[1] << " nb1=" << src2->nb[1] << " ne2=" << src2->ne[2] << " nb2=" << src2->nb[2] << " ne3=" << src2->ne[3] << " nb3=" << src2->nb[3] << " offset=" << src2->view_offs << std::endl; } + if (src3 != nullptr) { + std::cerr << "src3=" << src3 << " op=" << ggml_op_name(src3->op) << " type=" << ggml_type_name(src3->type) << " ne0=" << src3->ne[0] << " nb0=" << src3->nb[0] << " ne1=" << src3->ne[1] << " nb1=" << src3->nb[1] << " ne2=" << src3->ne[2] << " nb2=" << src3->nb[2] << " ne3=" << src3->ne[3] << " nb3=" << src3->nb[3] << " offset=" << src3->view_offs << std::endl; + } std::cerr << "First error: result=" << first_error_result << " correct=" << first_error_correct << " i3=" << first_error[3] << " i2=" << first_error[2] << " i1=" << first_error[1] << " i0=" << first_error[0] << std::endl; std::cerr << std::endl << "Result:" << std::endl; ggml_vk_print_tensor_area(tensor, tensor_data, first_error[0], first_error[1], first_error[2], first_error[3]); diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp b/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp index c5be8131b..ca3a59b8f 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp @@ -42,10 +42,13 @@ layout (push_constant) uniform parameter { uint32_t nev3; uint32_t nem1; + uint32_t nb01; uint32_t nb02; uint32_t nb03; + uint32_t nb11; uint32_t nb12; uint32_t nb13; + uint32_t nb21; uint32_t nb22; uint32_t nb23; uint32_t nb31; @@ -146,6 +149,23 @@ void main() { tensorLayoutK = setTensorLayoutDimensionNV(tensorLayoutK, KV, D); tensorLayoutV = setTensorLayoutDimensionNV(tensorLayoutV, KV, D); + // nb?1 are already divided by the type size and are in units of elements + uint32_t q_stride = p.nb01; + uint32_t k_stride = p.nb11; + uint32_t v_stride = p.nb21; + // hint to the compiler that strides are aligned for the aligned variant of the shader + if (Clamp != gl_CooperativeMatrixClampModeConstantNV) + { + q_stride &= ~7; +#if !defined(BLOCK_SIZE) + k_stride &= ~7; + v_stride &= ~7; +#endif + } + tensorLayoutQ = setTensorLayoutStrideNV(tensorLayoutQ, q_stride, 1); + tensorLayoutK = setTensorLayoutStrideNV(tensorLayoutK, k_stride, 1); + tensorLayoutV = setTensorLayoutStrideNV(tensorLayoutV, v_stride, 1); + coopmat Q; coopmat Qf16; diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index 5cde8289f..74d1bee39 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -3046,9 +3046,10 @@ struct test_flash_attn_ext : public test_case { const float logit_softcap; // Gemma 2 const ggml_type type_KV; + std::array permute; std::string vars() override { - return VARS_TO_STR8(hs, nh, kv, nb, mask, max_bias, logit_softcap, type_KV); + return VARS_TO_STR9(hs, nh, kv, nb, mask, max_bias, logit_softcap, type_KV, permute); } double max_nmse_err() override { @@ -3063,19 +3064,33 @@ struct test_flash_attn_ext : public test_case { } test_flash_attn_ext(int64_t hs = 128, int64_t nh = 32, int64_t kv = 96, int64_t nb = 8, - bool mask = true, float max_bias = 0.0f, float logit_softcap = 0.0f, ggml_type type_KV = GGML_TYPE_F16) - : hs(hs), nh(nh), kv(kv), nb(nb), mask(mask), max_bias(max_bias), logit_softcap(logit_softcap), type_KV(type_KV) {} + bool mask = true, float max_bias = 0.0f, float logit_softcap = 0.0f, ggml_type type_KV = GGML_TYPE_F16, + std::array permute = {0, 1, 2, 3}) + : hs(hs), nh(nh), kv(kv), nb(nb), mask(mask), max_bias(max_bias), logit_softcap(logit_softcap), type_KV(type_KV), permute(permute) {} ggml_tensor * build_graph(ggml_context * ctx) override { const int64_t hs_padded = GGML_PAD(hs, ggml_blck_size(type_KV)); - ggml_tensor * q = ggml_new_tensor_4d(ctx, GGML_TYPE_F32, hs_padded, nb, nh, 1); + auto const &create_permuted = [&](ggml_type type, int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3) -> ggml_tensor * { + int64_t ne[4] = {ne0, ne1, ne2, ne3}; + int64_t ne_perm[4]; + for (int i = 0; i < 4; ++i) { + ne_perm[permute[i]] = ne[i]; + } + ggml_tensor * t = ggml_new_tensor_4d(ctx, type, ne_perm[0], ne_perm[1], ne_perm[2], ne_perm[3]); + if (permute != std::array{0, 1, 2, 3}) { + t = ggml_permute(ctx, t, permute[0], permute[1], permute[2], permute[3]); + } + return t; + }; + + ggml_tensor * q = create_permuted(GGML_TYPE_F32, hs_padded, nb, nh, 1); ggml_set_name(q, "q"); - ggml_tensor * k = ggml_new_tensor_4d(ctx, type_KV, hs_padded, kv, nh, 1); + ggml_tensor * k = create_permuted(type_KV, hs_padded, kv, nh, 1); ggml_set_name(k, "k"); - ggml_tensor * v = ggml_new_tensor_4d(ctx, type_KV, hs_padded, kv, nh, 1); + ggml_tensor * v = create_permuted(type_KV, hs_padded, kv, nh, 1); ggml_set_name(v, "v"); ggml_tensor * m = nullptr; @@ -4167,6 +4182,10 @@ static std::vector> make_test_cases_eval() { for (int nb : { 1, 3, 32, 35, }) { for (ggml_type type_KV : {GGML_TYPE_F16, GGML_TYPE_BF16, GGML_TYPE_Q8_0, GGML_TYPE_Q4_0}) { test_cases.emplace_back(new test_flash_attn_ext(hs, nh, kv, nb, mask, max_bias, logit_softcap, type_KV)); + // run fewer test cases permuted + if (mask == true && max_bias == 0.0f && logit_softcap == 0 && kv == 512) { + test_cases.emplace_back(new test_flash_attn_ext(hs, nh, kv, nb, mask, max_bias, logit_softcap, type_KV, {0, 2, 1, 3})); + } } } } From 6390a998bfc63241fde8509022b0768a71bf20bb Mon Sep 17 00:00:00 2001 From: LostRuins Concedo <39025047+LostRuins@users.noreply.github.com> Date: Sat, 18 Jan 2025 18:20:57 +0800 Subject: [PATCH 2/9] tts : add guide tokens support (#11186) * Added the ability to use guide tokens for OuteTTS, greatly improving TTS recitation accuracy over long input sequences. * applied linting suggestions, updated to latest llama_vocab changes, added a safety check, added newline to guide token start --- common/arg.cpp | 7 +++++++ common/common.h | 2 ++ examples/tts/tts.cpp | 45 +++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 53 insertions(+), 1 deletion(-) diff --git a/common/arg.cpp b/common/arg.cpp index 9069950eb..dede335fb 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -2254,6 +2254,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex params.vocoder.model = value; } ).set_examples({LLAMA_EXAMPLE_TTS, LLAMA_EXAMPLE_SERVER})); + add_opt(common_arg( + {"--tts-use-guide-tokens"}, + "Use guide tokens to improve TTS word recall", + [](common_params & params) { + params.vocoder.use_guide_tokens = true; + } + ).set_examples({LLAMA_EXAMPLE_TTS, LLAMA_EXAMPLE_SERVER})); // model-specific add_opt(common_arg( diff --git a/common/common.h b/common/common.h index 691141d6b..3bcc637cc 100644 --- a/common/common.h +++ b/common/common.h @@ -184,6 +184,8 @@ struct common_params_vocoder { std::string model = ""; // model path // NOLINT std::string model_url = ""; // model url to download // NOLINT + + bool use_guide_tokens = false; // enable guide tokens to improve TTS accuracy // NOLINT }; struct common_params { diff --git a/examples/tts/tts.cpp b/examples/tts/tts.cpp index 5a9161181..f78f76303 100644 --- a/examples/tts/tts.cpp +++ b/examples/tts/tts.cpp @@ -425,6 +425,33 @@ static void prompt_init(llama_tokens & prompt, const llama_vocab * vocab) { prompt_add(prompt, vocab, "<|im_start|>\n", true, true); } +static std::vector prepare_guide_tokens(const llama_vocab * vocab, const std::string & str) { + const std::string& delimiter = "<|text_sep|>"; + + std::vector result; + size_t start = 0; + size_t end = str.find(delimiter); + + //first token is always a newline, as it was not previously added + result.push_back(common_tokenize(vocab, "\n", false, true)[0]); + + while (end != std::string::npos) { + std::string current_word = str.substr(start, end - start); + auto tmp = common_tokenize(vocab, current_word, false, true); + result.push_back(tmp[0]); + start = end + delimiter.length(); + end = str.find(delimiter, start); + } + + // Add the last part + std::string current_word = str.substr(start); + auto tmp = common_tokenize(vocab, current_word, false, true); + if (tmp.size() > 0) { + result.push_back(tmp[0]); + } + return result; +} + int main(int argc, char ** argv) { common_params params; @@ -494,6 +521,7 @@ int main(int argc, char ** argv) { const auto t_main_start = ggml_time_us(); std::vector codes; + std::vector guide_tokens; // process prompt and generate voice codes { @@ -508,6 +536,9 @@ int main(int argc, char ** argv) { // convert the input text into the necessary format expected by OuteTTS { std::string prompt_clean = process_text(params.prompt); + if (params.vocoder.use_guide_tokens) { + guide_tokens = prepare_guide_tokens(vocab, prompt_clean); + } LOG_INF("%s: prompt: '%s'\n", __func__, prompt_clean.c_str()); @@ -717,6 +748,8 @@ lovely<|t_0.56|><|code_start|><|634|><|596|><|1766|><|1556|><|1306|><|1285|><|14 int n_past = batch.n_tokens; int n_decode = 0; + bool next_token_uses_guide_token = true; + while (n_decode <= n_predict) { // prepare the next batch common_batch_clear(batch); @@ -728,7 +761,17 @@ lovely<|t_0.56|><|code_start|><|634|><|596|><|1766|><|1556|><|1306|><|1285|><|14 continue; } - const llama_token new_token_id = common_sampler_sample(smpl[i], ctx_ttc, i_batch[i]); + llama_token new_token_id = common_sampler_sample(smpl[i], ctx_ttc, i_batch[i]); + + //guide tokens help prevent hallucinations by forcing the TTS to use the correct word + if (!guide_tokens.empty() && next_token_uses_guide_token && !llama_vocab_is_control(vocab, new_token_id) && !llama_vocab_is_eog(vocab, new_token_id)) { + llama_token guide_token = guide_tokens[0]; + guide_tokens.erase(guide_tokens.begin()); + new_token_id = guide_token; //ensure correct word fragment is used + } + + //this is the token id that always precedes a new word + next_token_uses_guide_token = (new_token_id == 198); common_sampler_accept(smpl[i], new_token_id, true); From f26c87417999209e7f4576b4f3ecf7a5b9c66a29 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sat, 18 Jan 2025 13:18:32 +0200 Subject: [PATCH 3/9] scripts : restore hf.sh (#11288) ggml-ci --- scripts/hf.sh | 112 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100755 scripts/hf.sh diff --git a/scripts/hf.sh b/scripts/hf.sh new file mode 100755 index 000000000..b251925fa --- /dev/null +++ b/scripts/hf.sh @@ -0,0 +1,112 @@ +#!/bin/bash +# +# Shortcut for downloading HF models +# +# Usage: +# ./llama-cli -m $(./scripts/hf.sh https://huggingface.co/TheBloke/Mixtral-8x7B-v0.1-GGUF/resolve/main/mixtral-8x7b-v0.1.Q4_K_M.gguf) +# ./llama-cli -m $(./scripts/hf.sh --url https://huggingface.co/TheBloke/Mixtral-8x7B-v0.1-GGUF/blob/main/mixtral-8x7b-v0.1.Q4_K_M.gguf) +# ./llama-cli -m $(./scripts/hf.sh --repo TheBloke/Mixtral-8x7B-v0.1-GGUF --file mixtral-8x7b-v0.1.Q4_K_M.gguf) +# + +# all logs go to stderr +function log { + echo "$@" 1>&2 +} + +function usage { + log "Usage: $0 [[--url] ] [--repo ] [--file ] [--outdir [-h|--help]" + exit 1 +} + +# check for curl or wget +function has_cmd { + if ! [ -x "$(command -v $1)" ]; then + return 1 + fi +} + +if has_cmd wget; then + cmd="wget -q -c -O %s/%s %s" +elif has_cmd curl; then + cmd="curl -C - -f --output-dir %s -o %s -L %s" +else + log "[E] curl or wget not found" + exit 1 +fi + +url="" +repo="" +file="" +outdir="." + +# parse args +while [[ $# -gt 0 ]]; do + case "$1" in + --url) + url="$2" + shift 2 + ;; + --repo) + repo="$2" + shift 2 + ;; + --file) + file="$2" + shift 2 + ;; + --outdir) + outdir="$2" + shift 2 + ;; + -h|--help) + usage + ;; + *) + url="$1" + shift + ;; + esac +done + +if [ -n "$repo" ] && [ -n "$file" ]; then + url="https://huggingface.co/$repo/resolve/main/$file" +fi + +if [ -z "$url" ]; then + log "[E] missing --url" + usage +fi + +# check if the URL is a HuggingFace model, and if so, try to download it +is_url=false + +if [[ ${#url} -gt 22 ]]; then + if [[ ${url:0:22} == "https://huggingface.co" ]]; then + is_url=true + fi +fi + +if [ "$is_url" = false ]; then + log "[E] invalid URL, must start with https://huggingface.co" + exit 0 +fi + +# replace "blob/main" with "resolve/main" +url=${url/blob\/main/resolve\/main} + +basename=$(basename $url) + +log "[+] attempting to download $basename" + +if [ -n "$cmd" ]; then + cmd=$(printf "$cmd" "$outdir" "$basename" "$url") + log "[+] $cmd" + if $cmd; then + echo $outdir/$basename + exit 0 + fi +fi + +log "[-] failed to download" + +exit 1 From f30f099228f774209aa3010b78dfbe5d262e69aa Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Sat, 18 Jan 2025 14:12:05 +0100 Subject: [PATCH 4/9] server : implement cancellable request (#11285) * server : implement cancellable request * fix typo * httplib 0.18.5 * fix i underflow --- examples/server/httplib.h | 1726 +++++++++++++---- examples/server/server.cpp | 73 +- examples/server/tests/unit/test_completion.py | 21 + examples/server/tests/utils.py | 7 +- 4 files changed, 1396 insertions(+), 431 deletions(-) diff --git a/examples/server/httplib.h b/examples/server/httplib.h index f360bd93e..c2f12dd2a 100644 --- a/examples/server/httplib.h +++ b/examples/server/httplib.h @@ -8,7 +8,7 @@ #ifndef CPPHTTPLIB_HTTPLIB_H #define CPPHTTPLIB_HTTPLIB_H -#define CPPHTTPLIB_VERSION "0.15.3" +#define CPPHTTPLIB_VERSION "0.18.5" /* * Configuration @@ -18,8 +18,12 @@ #define CPPHTTPLIB_KEEPALIVE_TIMEOUT_SECOND 5 #endif +#ifndef CPPHTTPLIB_KEEPALIVE_TIMEOUT_CHECK_INTERVAL_USECOND +#define CPPHTTPLIB_KEEPALIVE_TIMEOUT_CHECK_INTERVAL_USECOND 10000 +#endif + #ifndef CPPHTTPLIB_KEEPALIVE_MAX_COUNT -#define CPPHTTPLIB_KEEPALIVE_MAX_COUNT 5 +#define CPPHTTPLIB_KEEPALIVE_MAX_COUNT 100 #endif #ifndef CPPHTTPLIB_CONNECTION_TIMEOUT_SECOND @@ -30,20 +34,36 @@ #define CPPHTTPLIB_CONNECTION_TIMEOUT_USECOND 0 #endif -#ifndef CPPHTTPLIB_READ_TIMEOUT_SECOND -#define CPPHTTPLIB_READ_TIMEOUT_SECOND 5 +#ifndef CPPHTTPLIB_SERVER_READ_TIMEOUT_SECOND +#define CPPHTTPLIB_SERVER_READ_TIMEOUT_SECOND 5 #endif -#ifndef CPPHTTPLIB_READ_TIMEOUT_USECOND -#define CPPHTTPLIB_READ_TIMEOUT_USECOND 0 +#ifndef CPPHTTPLIB_SERVER_READ_TIMEOUT_USECOND +#define CPPHTTPLIB_SERVER_READ_TIMEOUT_USECOND 0 #endif -#ifndef CPPHTTPLIB_WRITE_TIMEOUT_SECOND -#define CPPHTTPLIB_WRITE_TIMEOUT_SECOND 5 +#ifndef CPPHTTPLIB_SERVER_WRITE_TIMEOUT_SECOND +#define CPPHTTPLIB_SERVER_WRITE_TIMEOUT_SECOND 5 #endif -#ifndef CPPHTTPLIB_WRITE_TIMEOUT_USECOND -#define CPPHTTPLIB_WRITE_TIMEOUT_USECOND 0 +#ifndef CPPHTTPLIB_SERVER_WRITE_TIMEOUT_USECOND +#define CPPHTTPLIB_SERVER_WRITE_TIMEOUT_USECOND 0 +#endif + +#ifndef CPPHTTPLIB_CLIENT_READ_TIMEOUT_SECOND +#define CPPHTTPLIB_CLIENT_READ_TIMEOUT_SECOND 300 +#endif + +#ifndef CPPHTTPLIB_CLIENT_READ_TIMEOUT_USECOND +#define CPPHTTPLIB_CLIENT_READ_TIMEOUT_USECOND 0 +#endif + +#ifndef CPPHTTPLIB_CLIENT_WRITE_TIMEOUT_SECOND +#define CPPHTTPLIB_CLIENT_WRITE_TIMEOUT_SECOND 5 +#endif + +#ifndef CPPHTTPLIB_CLIENT_WRITE_TIMEOUT_USECOND +#define CPPHTTPLIB_CLIENT_WRITE_TIMEOUT_USECOND 0 #endif #ifndef CPPHTTPLIB_IDLE_INTERVAL_SECOND @@ -90,8 +110,12 @@ #define CPPHTTPLIB_TCP_NODELAY false #endif +#ifndef CPPHTTPLIB_IPV6_V6ONLY +#define CPPHTTPLIB_IPV6_V6ONLY false +#endif + #ifndef CPPHTTPLIB_RECV_BUFSIZ -#define CPPHTTPLIB_RECV_BUFSIZ size_t(4096u) +#define CPPHTTPLIB_RECV_BUFSIZ size_t(16384u) #endif #ifndef CPPHTTPLIB_COMPRESSION_BUFSIZ @@ -145,11 +169,11 @@ using ssize_t = long; #endif // _MSC_VER #ifndef S_ISREG -#define S_ISREG(m) (((m)&S_IFREG) == S_IFREG) +#define S_ISREG(m) (((m) & S_IFREG) == S_IFREG) #endif // S_ISREG #ifndef S_ISDIR -#define S_ISDIR(m) (((m)&S_IFDIR) == S_IFDIR) +#define S_ISDIR(m) (((m) & S_IFDIR) == S_IFDIR) #endif // S_ISDIR #ifndef NOMINMAX @@ -269,7 +293,12 @@ using socket_t = int; #include #include -#if OPENSSL_VERSION_NUMBER < 0x30000000L +#if defined(OPENSSL_IS_BORINGSSL) || defined(LIBRESSL_VERSION_NUMBER) +#if OPENSSL_VERSION_NUMBER < 0x1010107f +#error Please use OpenSSL or a current version of BoringSSL +#endif +#define SSL_get1_peer_certificate SSL_get_peer_certificate +#elif OPENSSL_VERSION_NUMBER < 0x30000000L #error Sorry, OpenSSL versions prior to 3.0.0 are not supported #endif @@ -312,16 +341,63 @@ make_unique(std::size_t n) { return std::unique_ptr(new RT[n]); } -struct ci { - bool operator()(const std::string &s1, const std::string &s2) const { - return std::lexicographical_compare(s1.begin(), s1.end(), s2.begin(), - s2.end(), - [](unsigned char c1, unsigned char c2) { - return ::tolower(c1) < ::tolower(c2); - }); +namespace case_ignore { + +inline unsigned char to_lower(int c) { + const static unsigned char table[256] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, + 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, + 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, + 122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, + 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, + 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, + 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, + 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, + 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, + 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 224, 225, 226, + 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, + 242, 243, 244, 245, 246, 215, 248, 249, 250, 251, 252, 253, 254, 223, 224, + 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, + 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, + 255, + }; + return table[(unsigned char)(char)c]; +} + +inline bool equal(const std::string &a, const std::string &b) { + return a.size() == b.size() && + std::equal(a.begin(), a.end(), b.begin(), [](char ca, char cb) { + return to_lower(ca) == to_lower(cb); + }); +} + +struct equal_to { + bool operator()(const std::string &a, const std::string &b) const { + return equal(a, b); } }; +struct hash { + size_t operator()(const std::string &key) const { + return hash_core(key.data(), key.size(), 0); + } + + size_t hash_core(const char *s, size_t l, size_t h) const { + return (l == 0) ? h + : hash_core(s + 1, l - 1, + // Unsets the 6 high bits of h, therefore no + // overflow happens + (((std::numeric_limits::max)() >> 6) & + h * 33) ^ + static_cast(to_lower(*s))); + } +}; + +} // namespace case_ignore + // This is based on // "http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2014/n4189". @@ -427,7 +503,9 @@ enum StatusCode { NetworkAuthenticationRequired_511 = 511, }; -using Headers = std::multimap; +using Headers = + std::unordered_multimap; using Params = std::multimap; using Match = std::smatch; @@ -534,6 +612,7 @@ using Ranges = std::vector; struct Request { std::string method; std::string path; + Params params; Headers headers; std::string body; @@ -545,11 +624,11 @@ struct Request { // for server std::string version; std::string target; - Params params; MultipartFormDataMap files; Ranges ranges; Match matches; std::unordered_map path_params; + std::function is_connection_closed = []() { return true; }; // for client ResponseHandler response_handler; @@ -560,8 +639,10 @@ struct Request { #endif bool has_header(const std::string &key) const; - std::string get_header_value(const std::string &key, size_t id = 0) const; - uint64_t get_header_value_u64(const std::string &key, size_t id = 0) const; + std::string get_header_value(const std::string &key, const char *def = "", + size_t id = 0) const; + uint64_t get_header_value_u64(const std::string &key, uint64_t def = 0, + size_t id = 0) const; size_t get_header_value_count(const std::string &key) const; void set_header(const std::string &key, const std::string &val); @@ -592,8 +673,10 @@ struct Response { std::string location; // Redirect location bool has_header(const std::string &key) const; - std::string get_header_value(const std::string &key, size_t id = 0) const; - uint64_t get_header_value_u64(const std::string &key, size_t id = 0) const; + std::string get_header_value(const std::string &key, const char *def = "", + size_t id = 0) const; + uint64_t get_header_value_u64(const std::string &key, uint64_t def = 0, + size_t id = 0) const; size_t get_header_value_count(const std::string &key) const; void set_header(const std::string &key, const std::string &val); @@ -614,6 +697,10 @@ struct Response { const std::string &content_type, ContentProviderWithoutLength provider, ContentProviderResourceReleaser resource_releaser = nullptr); + void set_file_content(const std::string &path, + const std::string &content_type); + void set_file_content(const std::string &path); + Response() = default; Response(const Response &) = default; Response &operator=(const Response &) = default; @@ -631,6 +718,8 @@ struct Response { ContentProviderResourceReleaser content_provider_resource_releaser_; bool is_chunked_content_provider_ = false; bool content_provider_success_ = false; + std::string file_content_path_; + std::string file_content_content_type_; }; class Stream { @@ -646,8 +735,6 @@ public: virtual void get_local_ip_and_port(std::string &ip, int &port) const = 0; virtual socket_t socket() const = 0; - template - ssize_t write_format(const char *fmt, const Args &...args); ssize_t write(const char *ptr); ssize_t write(const std::string &s); }; @@ -719,13 +806,18 @@ private: if (pool_.shutdown_ && pool_.jobs_.empty()) { break; } - fn = std::move(pool_.jobs_.front()); + fn = pool_.jobs_.front(); pool_.jobs_.pop_front(); } assert(true == static_cast(fn)); fn(); } + +#if defined(CPPHTTPLIB_OPENSSL_SUPPORT) && !defined(OPENSSL_IS_BORINGSSL) && \ + !defined(LIBRESSL_VERSION_NUMBER) + OPENSSL_thread_stop(); +#endif } ThreadPool &pool_; @@ -787,7 +879,6 @@ public: bool match(Request &request) const override; private: - static constexpr char marker = ':'; // Treat segment separators as the end of path parameter capture // Does not need to handle query parameters as they are parsed before path // matching @@ -871,8 +962,13 @@ public: Server &set_default_file_mimetype(const std::string &mime); Server &set_file_request_handler(Handler handler); - Server &set_error_handler(HandlerWithResponse handler); - Server &set_error_handler(Handler handler); + template + Server &set_error_handler(ErrorHandlerFunc &&handler) { + return set_error_handler_core( + std::forward(handler), + std::is_convertible{}); + } + Server &set_exception_handler(ExceptionHandler handler); Server &set_pre_routing_handler(HandlerWithResponse handler); Server &set_post_routing_handler(Handler handler); @@ -882,6 +978,7 @@ public: Server &set_address_family(int family); Server &set_tcp_nodelay(bool on); + Server &set_ipv6_v6only(bool on); Server &set_socket_options(SocketOptions socket_options); Server &set_default_headers(Headers headers); @@ -914,21 +1011,24 @@ public: bool is_running() const; void wait_until_ready() const; void stop(); + void decommission(); std::function new_task_queue; protected: - bool process_request(Stream &strm, bool close_connection, + bool process_request(Stream &strm, const std::string &remote_addr, + int remote_port, const std::string &local_addr, + int local_port, bool close_connection, bool &connection_closed, const std::function &setup_request); std::atomic svr_sock_{INVALID_SOCKET}; size_t keep_alive_max_count_ = CPPHTTPLIB_KEEPALIVE_MAX_COUNT; time_t keep_alive_timeout_sec_ = CPPHTTPLIB_KEEPALIVE_TIMEOUT_SECOND; - time_t read_timeout_sec_ = CPPHTTPLIB_READ_TIMEOUT_SECOND; - time_t read_timeout_usec_ = CPPHTTPLIB_READ_TIMEOUT_USECOND; - time_t write_timeout_sec_ = CPPHTTPLIB_WRITE_TIMEOUT_SECOND; - time_t write_timeout_usec_ = CPPHTTPLIB_WRITE_TIMEOUT_USECOND; + time_t read_timeout_sec_ = CPPHTTPLIB_SERVER_READ_TIMEOUT_SECOND; + time_t read_timeout_usec_ = CPPHTTPLIB_SERVER_READ_TIMEOUT_USECOND; + time_t write_timeout_sec_ = CPPHTTPLIB_SERVER_WRITE_TIMEOUT_SECOND; + time_t write_timeout_usec_ = CPPHTTPLIB_SERVER_WRITE_TIMEOUT_USECOND; time_t idle_interval_sec_ = CPPHTTPLIB_IDLE_INTERVAL_SECOND; time_t idle_interval_usec_ = CPPHTTPLIB_IDLE_INTERVAL_USECOND; size_t payload_max_length_ = CPPHTTPLIB_PAYLOAD_MAX_LENGTH; @@ -943,6 +1043,9 @@ private: static std::unique_ptr make_matcher(const std::string &pattern); + Server &set_error_handler_core(HandlerWithResponse handler, std::true_type); + Server &set_error_handler_core(Handler handler, std::false_type); + socket_t create_server_socket(const std::string &host, int port, int socket_flags, SocketOptions socket_options) const; @@ -985,7 +1088,7 @@ private: virtual bool process_and_close_socket(socket_t sock); std::atomic is_running_{false}; - std::atomic done_{false}; + std::atomic is_decommisioned{false}; struct MountPointEntry { std::string mount_point; @@ -1018,6 +1121,7 @@ private: int address_family_ = AF_UNSPEC; bool tcp_nodelay_ = CPPHTTPLIB_TCP_NODELAY; + bool ipv6_v6only_ = CPPHTTPLIB_IPV6_V6ONLY; SocketOptions socket_options_ = default_socket_options; Headers default_headers_; @@ -1037,6 +1141,7 @@ enum class Error { SSLConnection, SSLLoadingCerts, SSLServerVerification, + SSLServerHostnameVerification, UnsupportedMultipartBoundaryChars, Compression, ConnectionTimeout, @@ -1074,9 +1179,10 @@ public: // Request Headers bool has_request_header(const std::string &key) const; std::string get_request_header_value(const std::string &key, + const char *def = "", size_t id = 0) const; uint64_t get_request_header_value_u64(const std::string &key, - size_t id = 0) const; + uint64_t def = 0, size_t id = 0) const; size_t get_request_header_value_count(const std::string &key) const; private: @@ -1140,10 +1246,18 @@ public: const std::string &content_type); Result Post(const std::string &path, const Headers &headers, const char *body, size_t content_length, const std::string &content_type); + Result Post(const std::string &path, const Headers &headers, const char *body, + size_t content_length, const std::string &content_type, + Progress progress); Result Post(const std::string &path, const std::string &body, const std::string &content_type); + Result Post(const std::string &path, const std::string &body, + const std::string &content_type, Progress progress); Result Post(const std::string &path, const Headers &headers, const std::string &body, const std::string &content_type); + Result Post(const std::string &path, const Headers &headers, + const std::string &body, const std::string &content_type, + Progress progress); Result Post(const std::string &path, size_t content_length, ContentProvider content_provider, const std::string &content_type); @@ -1159,6 +1273,8 @@ public: Result Post(const std::string &path, const Params ¶ms); Result Post(const std::string &path, const Headers &headers, const Params ¶ms); + Result Post(const std::string &path, const Headers &headers, + const Params ¶ms, Progress progress); Result Post(const std::string &path, const MultipartFormDataItems &items); Result Post(const std::string &path, const Headers &headers, const MultipartFormDataItems &items); @@ -1173,10 +1289,18 @@ public: const std::string &content_type); Result Put(const std::string &path, const Headers &headers, const char *body, size_t content_length, const std::string &content_type); + Result Put(const std::string &path, const Headers &headers, const char *body, + size_t content_length, const std::string &content_type, + Progress progress); Result Put(const std::string &path, const std::string &body, const std::string &content_type); + Result Put(const std::string &path, const std::string &body, + const std::string &content_type, Progress progress); Result Put(const std::string &path, const Headers &headers, const std::string &body, const std::string &content_type); + Result Put(const std::string &path, const Headers &headers, + const std::string &body, const std::string &content_type, + Progress progress); Result Put(const std::string &path, size_t content_length, ContentProvider content_provider, const std::string &content_type); Result Put(const std::string &path, @@ -1191,6 +1315,8 @@ public: Result Put(const std::string &path, const Params ¶ms); Result Put(const std::string &path, const Headers &headers, const Params ¶ms); + Result Put(const std::string &path, const Headers &headers, + const Params ¶ms, Progress progress); Result Put(const std::string &path, const MultipartFormDataItems &items); Result Put(const std::string &path, const Headers &headers, const MultipartFormDataItems &items); @@ -1203,13 +1329,23 @@ public: Result Patch(const std::string &path); Result Patch(const std::string &path, const char *body, size_t content_length, const std::string &content_type); + Result Patch(const std::string &path, const char *body, size_t content_length, + const std::string &content_type, Progress progress); Result Patch(const std::string &path, const Headers &headers, const char *body, size_t content_length, const std::string &content_type); + Result Patch(const std::string &path, const Headers &headers, + const char *body, size_t content_length, + const std::string &content_type, Progress progress); Result Patch(const std::string &path, const std::string &body, const std::string &content_type); + Result Patch(const std::string &path, const std::string &body, + const std::string &content_type, Progress progress); Result Patch(const std::string &path, const Headers &headers, const std::string &body, const std::string &content_type); + Result Patch(const std::string &path, const Headers &headers, + const std::string &body, const std::string &content_type, + Progress progress); Result Patch(const std::string &path, size_t content_length, ContentProvider content_provider, const std::string &content_type); @@ -1227,13 +1363,24 @@ public: Result Delete(const std::string &path, const Headers &headers); Result Delete(const std::string &path, const char *body, size_t content_length, const std::string &content_type); + Result Delete(const std::string &path, const char *body, + size_t content_length, const std::string &content_type, + Progress progress); Result Delete(const std::string &path, const Headers &headers, const char *body, size_t content_length, const std::string &content_type); + Result Delete(const std::string &path, const Headers &headers, + const char *body, size_t content_length, + const std::string &content_type, Progress progress); Result Delete(const std::string &path, const std::string &body, const std::string &content_type); + Result Delete(const std::string &path, const std::string &body, + const std::string &content_type, Progress progress); Result Delete(const std::string &path, const Headers &headers, const std::string &body, const std::string &content_type); + Result Delete(const std::string &path, const Headers &headers, + const std::string &body, const std::string &content_type, + Progress progress); Result Options(const std::string &path); Result Options(const std::string &path, const Headers &headers); @@ -1258,6 +1405,7 @@ public: void set_address_family(int family); void set_tcp_nodelay(bool on); + void set_ipv6_v6only(bool on); void set_socket_options(SocketOptions socket_options); void set_connection_timeout(time_t sec, time_t usec = 0); @@ -1309,6 +1457,8 @@ public: #ifdef CPPHTTPLIB_OPENSSL_SUPPORT void enable_server_certificate_verification(bool enabled); + void enable_server_hostname_verification(bool enabled); + void set_server_certificate_verifier(std::function verifier); #endif void set_logger(Logger logger); @@ -1375,10 +1525,10 @@ protected: time_t connection_timeout_sec_ = CPPHTTPLIB_CONNECTION_TIMEOUT_SECOND; time_t connection_timeout_usec_ = CPPHTTPLIB_CONNECTION_TIMEOUT_USECOND; - time_t read_timeout_sec_ = CPPHTTPLIB_READ_TIMEOUT_SECOND; - time_t read_timeout_usec_ = CPPHTTPLIB_READ_TIMEOUT_USECOND; - time_t write_timeout_sec_ = CPPHTTPLIB_WRITE_TIMEOUT_SECOND; - time_t write_timeout_usec_ = CPPHTTPLIB_WRITE_TIMEOUT_USECOND; + time_t read_timeout_sec_ = CPPHTTPLIB_CLIENT_READ_TIMEOUT_SECOND; + time_t read_timeout_usec_ = CPPHTTPLIB_CLIENT_READ_TIMEOUT_USECOND; + time_t write_timeout_sec_ = CPPHTTPLIB_CLIENT_WRITE_TIMEOUT_SECOND; + time_t write_timeout_usec_ = CPPHTTPLIB_CLIENT_WRITE_TIMEOUT_USECOND; std::string basic_auth_username_; std::string basic_auth_password_; @@ -1395,6 +1545,7 @@ protected: int address_family_ = AF_UNSPEC; bool tcp_nodelay_ = CPPHTTPLIB_TCP_NODELAY; + bool ipv6_v6only_ = CPPHTTPLIB_IPV6_V6ONLY; SocketOptions socket_options_ = nullptr; bool compress_ = false; @@ -1422,6 +1573,8 @@ protected: #ifdef CPPHTTPLIB_OPENSSL_SUPPORT bool server_certificate_verification_ = true; + bool server_hostname_verification_ = true; + std::function server_certificate_verifier_; #endif Logger logger_; @@ -1430,6 +1583,9 @@ private: bool send_(Request &req, Response &res, Error &error); Result send_(Request &&req); +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT + bool is_ssl_peer_could_be_closed(SSL *ssl) const; +#endif socket_t create_client_socket(Error &error) const; bool read_response_line(Stream &strm, const Request &req, Response &res) const; @@ -1448,7 +1604,7 @@ private: const Headers &headers, const char *body, size_t content_length, ContentProvider content_provider, ContentProviderWithoutLength content_provider_without_length, - const std::string &content_type); + const std::string &content_type, Progress progress); ContentProviderWithoutLength get_multipart_content_provider( const std::string &boundary, const MultipartFormDataItems &items, const MultipartFormDataProviderItems &provider_items) const; @@ -1477,6 +1633,7 @@ public: const std::string &client_key_path); Client(Client &&) = default; + Client &operator=(Client &&) = default; ~Client(); @@ -1523,10 +1680,18 @@ public: const std::string &content_type); Result Post(const std::string &path, const Headers &headers, const char *body, size_t content_length, const std::string &content_type); + Result Post(const std::string &path, const Headers &headers, const char *body, + size_t content_length, const std::string &content_type, + Progress progress); Result Post(const std::string &path, const std::string &body, const std::string &content_type); + Result Post(const std::string &path, const std::string &body, + const std::string &content_type, Progress progress); Result Post(const std::string &path, const Headers &headers, const std::string &body, const std::string &content_type); + Result Post(const std::string &path, const Headers &headers, + const std::string &body, const std::string &content_type, + Progress progress); Result Post(const std::string &path, size_t content_length, ContentProvider content_provider, const std::string &content_type); @@ -1542,6 +1707,8 @@ public: Result Post(const std::string &path, const Params ¶ms); Result Post(const std::string &path, const Headers &headers, const Params ¶ms); + Result Post(const std::string &path, const Headers &headers, + const Params ¶ms, Progress progress); Result Post(const std::string &path, const MultipartFormDataItems &items); Result Post(const std::string &path, const Headers &headers, const MultipartFormDataItems &items); @@ -1556,10 +1723,18 @@ public: const std::string &content_type); Result Put(const std::string &path, const Headers &headers, const char *body, size_t content_length, const std::string &content_type); + Result Put(const std::string &path, const Headers &headers, const char *body, + size_t content_length, const std::string &content_type, + Progress progress); Result Put(const std::string &path, const std::string &body, const std::string &content_type); + Result Put(const std::string &path, const std::string &body, + const std::string &content_type, Progress progress); Result Put(const std::string &path, const Headers &headers, const std::string &body, const std::string &content_type); + Result Put(const std::string &path, const Headers &headers, + const std::string &body, const std::string &content_type, + Progress progress); Result Put(const std::string &path, size_t content_length, ContentProvider content_provider, const std::string &content_type); Result Put(const std::string &path, @@ -1574,6 +1749,8 @@ public: Result Put(const std::string &path, const Params ¶ms); Result Put(const std::string &path, const Headers &headers, const Params ¶ms); + Result Put(const std::string &path, const Headers &headers, + const Params ¶ms, Progress progress); Result Put(const std::string &path, const MultipartFormDataItems &items); Result Put(const std::string &path, const Headers &headers, const MultipartFormDataItems &items); @@ -1586,13 +1763,23 @@ public: Result Patch(const std::string &path); Result Patch(const std::string &path, const char *body, size_t content_length, const std::string &content_type); + Result Patch(const std::string &path, const char *body, size_t content_length, + const std::string &content_type, Progress progress); Result Patch(const std::string &path, const Headers &headers, const char *body, size_t content_length, const std::string &content_type); + Result Patch(const std::string &path, const Headers &headers, + const char *body, size_t content_length, + const std::string &content_type, Progress progress); Result Patch(const std::string &path, const std::string &body, const std::string &content_type); + Result Patch(const std::string &path, const std::string &body, + const std::string &content_type, Progress progress); Result Patch(const std::string &path, const Headers &headers, const std::string &body, const std::string &content_type); + Result Patch(const std::string &path, const Headers &headers, + const std::string &body, const std::string &content_type, + Progress progress); Result Patch(const std::string &path, size_t content_length, ContentProvider content_provider, const std::string &content_type); @@ -1610,13 +1797,24 @@ public: Result Delete(const std::string &path, const Headers &headers); Result Delete(const std::string &path, const char *body, size_t content_length, const std::string &content_type); + Result Delete(const std::string &path, const char *body, + size_t content_length, const std::string &content_type, + Progress progress); Result Delete(const std::string &path, const Headers &headers, const char *body, size_t content_length, const std::string &content_type); + Result Delete(const std::string &path, const Headers &headers, + const char *body, size_t content_length, + const std::string &content_type, Progress progress); Result Delete(const std::string &path, const std::string &body, const std::string &content_type); + Result Delete(const std::string &path, const std::string &body, + const std::string &content_type, Progress progress); Result Delete(const std::string &path, const Headers &headers, const std::string &body, const std::string &content_type); + Result Delete(const std::string &path, const Headers &headers, + const std::string &body, const std::string &content_type, + Progress progress); Result Options(const std::string &path); Result Options(const std::string &path, const Headers &headers); @@ -1685,6 +1883,8 @@ public: #ifdef CPPHTTPLIB_OPENSSL_SUPPORT void enable_server_certificate_verification(bool enabled); + void enable_server_hostname_verification(bool enabled); + void set_server_certificate_verifier(std::function verifier); #endif void set_logger(Logger logger); @@ -1730,6 +1930,9 @@ public: SSL_CTX *ssl_context() const; + void update_certs(X509 *cert, EVP_PKEY *private_key, + X509_STORE *client_ca_cert_store = nullptr); + private: bool process_and_close_socket(socket_t sock) override; @@ -1810,68 +2013,58 @@ inline void duration_to_sec_and_usec(const T &duration, U callback) { callback(static_cast(sec), static_cast(usec)); } +inline bool is_numeric(const std::string &str) { + return !str.empty() && std::all_of(str.begin(), str.end(), ::isdigit); +} + inline uint64_t get_header_value_u64(const Headers &headers, - const std::string &key, size_t id, - uint64_t def) { + const std::string &key, uint64_t def, + size_t id, bool &is_invalid_value) { + is_invalid_value = false; auto rng = headers.equal_range(key); auto it = rng.first; std::advance(it, static_cast(id)); if (it != rng.second) { - return std::strtoull(it->second.data(), nullptr, 10); + if (is_numeric(it->second)) { + return std::strtoull(it->second.data(), nullptr, 10); + } else { + is_invalid_value = true; + } } return def; } +inline uint64_t get_header_value_u64(const Headers &headers, + const std::string &key, uint64_t def, + size_t id) { + bool dummy = false; + return get_header_value_u64(headers, key, def, id, dummy); +} + } // namespace detail inline uint64_t Request::get_header_value_u64(const std::string &key, - size_t id) const { - return detail::get_header_value_u64(headers, key, id, 0); + uint64_t def, size_t id) const { + return detail::get_header_value_u64(headers, key, def, id); } inline uint64_t Response::get_header_value_u64(const std::string &key, - size_t id) const { - return detail::get_header_value_u64(headers, key, id, 0); -} - -template -inline ssize_t Stream::write_format(const char *fmt, const Args &...args) { - const auto bufsiz = 2048; - std::array buf{}; - - auto sn = snprintf(buf.data(), buf.size() - 1, fmt, args...); - if (sn <= 0) { return sn; } - - auto n = static_cast(sn); - - if (n >= buf.size() - 1) { - std::vector glowable_buf(buf.size()); - - while (n >= glowable_buf.size() - 1) { - glowable_buf.resize(glowable_buf.size() * 2); - n = static_cast( - snprintf(&glowable_buf[0], glowable_buf.size() - 1, fmt, args...)); - } - return write(&glowable_buf[0], n); - } else { - return write(buf.data(), n); - } + uint64_t def, size_t id) const { + return detail::get_header_value_u64(headers, key, def, id); } inline void default_socket_options(socket_t sock) { - int yes = 1; + int opt = 1; #ifdef _WIN32 setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, - reinterpret_cast(&yes), sizeof(yes)); - setsockopt(sock, SOL_SOCKET, SO_EXCLUSIVEADDRUSE, - reinterpret_cast(&yes), sizeof(yes)); + reinterpret_cast(&opt), sizeof(opt)); #else #ifdef SO_REUSEPORT setsockopt(sock, SOL_SOCKET, SO_REUSEPORT, - reinterpret_cast(&yes), sizeof(yes)); + reinterpret_cast(&opt), sizeof(opt)); #else setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, - reinterpret_cast(&yes), sizeof(yes)); + reinterpret_cast(&opt), sizeof(opt)); #endif #endif } @@ -1997,6 +2190,8 @@ inline std::string to_string(const Error error) { case Error::SSLConnection: return "SSL connection failed"; case Error::SSLLoadingCerts: return "SSL certificate loading failed"; case Error::SSLServerVerification: return "SSL server verification failed"; + case Error::SSLServerHostnameVerification: + return "SSL server hostname verification failed"; case Error::UnsupportedMultipartBoundaryChars: return "Unsupported HTTP multipart boundary characters"; case Error::Compression: return "Compression failed"; @@ -2016,8 +2211,9 @@ inline std::ostream &operator<<(std::ostream &os, const Error &obj) { } inline uint64_t Result::get_request_header_value_u64(const std::string &key, + uint64_t def, size_t id) const { - return detail::get_header_value_u64(request_headers_, key, id, 0); + return detail::get_header_value_u64(request_headers_, key, def, id); } template @@ -2080,6 +2276,36 @@ make_basic_authentication_header(const std::string &username, namespace detail { +#if defined(_WIN32) +inline std::wstring u8string_to_wstring(const char *s) { + std::wstring ws; + auto len = static_cast(strlen(s)); + auto wlen = ::MultiByteToWideChar(CP_UTF8, 0, s, len, nullptr, 0); + if (wlen > 0) { + ws.resize(wlen); + wlen = ::MultiByteToWideChar( + CP_UTF8, 0, s, len, + const_cast(reinterpret_cast(ws.data())), wlen); + if (wlen != static_cast(ws.size())) { ws.clear(); } + } + return ws; +} +#endif + +struct FileStat { + FileStat(const std::string &path); + bool is_file() const; + bool is_dir() const; + +private: +#if defined(_WIN32) + struct _stat st_; +#else + struct stat st_; +#endif + int ret_ = -1; +}; + std::string encode_query_param(const std::string &value); std::string decode_url(const std::string &s, bool convert_plus_to_space); @@ -2088,6 +2314,16 @@ void read_file(const std::string &path, std::string &out); std::string trim_copy(const std::string &s); +void divide( + const char *data, std::size_t size, char d, + std::function + fn); + +void divide( + const std::string &str, char d, + std::function + fn); + void split(const char *b, const char *e, char d, std::function fn); @@ -2099,18 +2335,23 @@ bool process_client_socket(socket_t sock, time_t read_timeout_sec, time_t write_timeout_usec, std::function callback); -socket_t create_client_socket( - const std::string &host, const std::string &ip, int port, - int address_family, bool tcp_nodelay, SocketOptions socket_options, - time_t connection_timeout_sec, time_t connection_timeout_usec, - time_t read_timeout_sec, time_t read_timeout_usec, time_t write_timeout_sec, - time_t write_timeout_usec, const std::string &intf, Error &error); +socket_t create_client_socket(const std::string &host, const std::string &ip, + int port, int address_family, bool tcp_nodelay, + bool ipv6_v6only, SocketOptions socket_options, + time_t connection_timeout_sec, + time_t connection_timeout_usec, + time_t read_timeout_sec, time_t read_timeout_usec, + time_t write_timeout_sec, + time_t write_timeout_usec, + const std::string &intf, Error &error); const char *get_header_value(const Headers &headers, const std::string &key, - size_t id = 0, const char *def = nullptr); + const char *def, size_t id); std::string params_to_query_str(const Params ¶ms); +void parse_query_text(const char *data, std::size_t size, Params ¶ms); + void parse_query_text(const std::string &s, Params ¶ms); bool parse_multipart_boundary(const std::string &content_type, @@ -2270,15 +2511,70 @@ public: private: #if defined(_WIN32) - HANDLE hFile_; - HANDLE hMapping_; + HANDLE hFile_ = NULL; + HANDLE hMapping_ = NULL; #else - int fd_; + int fd_ = -1; #endif - size_t size_; - void *addr_; + size_t size_ = 0; + void *addr_ = nullptr; + bool is_open_empty_file = false; }; +// NOTE: https://www.rfc-editor.org/rfc/rfc9110#section-5 +namespace fields { + +inline bool is_token_char(char c) { + return std::isalnum(c) || c == '!' || c == '#' || c == '$' || c == '%' || + c == '&' || c == '\'' || c == '*' || c == '+' || c == '-' || + c == '.' || c == '^' || c == '_' || c == '`' || c == '|' || c == '~'; +} + +inline bool is_token(const std::string &s) { + if (s.empty()) { return false; } + for (auto c : s) { + if (!is_token_char(c)) { return false; } + } + return true; +} + +inline bool is_field_name(const std::string &s) { return is_token(s); } + +inline bool is_vchar(char c) { return c >= 33 && c <= 126; } + +inline bool is_obs_text(char c) { return 128 <= static_cast(c); } + +inline bool is_field_vchar(char c) { return is_vchar(c) || is_obs_text(c); } + +inline bool is_field_content(const std::string &s) { + if (s.empty()) { return false; } + + if (s.size() == 1) { + return is_field_vchar(s[0]); + } else if (s.size() == 2) { + return is_field_vchar(s[0]) && is_field_vchar(s[1]); + } else { + size_t i = 0; + + if (!is_field_vchar(s[i])) { return false; } + i++; + + while (i < s.size() - 1) { + auto c = s[i++]; + if (c == ' ' || c == '\t' || is_field_vchar(c)) { + } else { + return false; + } + } + + return is_field_vchar(s[i]); + } +} + +inline bool is_field_value(const std::string &s) { return is_field_content(s); } + +} // namespace fields + } // namespace detail // ---------------------------------------------------------------------------- @@ -2392,20 +2688,6 @@ inline std::string base64_encode(const std::string &in) { return out; } -inline bool is_file(const std::string &path) { -#ifdef _WIN32 - return _access_s(path.c_str(), 0) == 0; -#else - struct stat st; - return stat(path.c_str(), &st) >= 0 && S_ISREG(st.st_mode); -#endif -} - -inline bool is_dir(const std::string &path) { - struct stat st; - return stat(path.c_str(), &st) >= 0 && S_ISDIR(st.st_mode); -} - inline bool is_valid_path(const std::string &path) { size_t level = 0; size_t i = 0; @@ -2448,6 +2730,21 @@ inline bool is_valid_path(const std::string &path) { return true; } +inline FileStat::FileStat(const std::string &path) { +#if defined(_WIN32) + auto wpath = u8string_to_wstring(path.c_str()); + ret_ = _wstat(wpath.c_str(), &st_); +#else + ret_ = stat(path.c_str(), &st_); +#endif +} +inline bool FileStat::is_file() const { + return ret_ >= 0 && S_ISREG(st_.st_mode); +} +inline bool FileStat::is_dir() const { + return ret_ >= 0 && S_ISDIR(st_.st_mode); +} + inline std::string encode_query_param(const std::string &value) { std::ostringstream escaped; escaped.fill('0'); @@ -2579,6 +2876,27 @@ inline std::string trim_double_quotes_copy(const std::string &s) { return s; } +inline void +divide(const char *data, std::size_t size, char d, + std::function + fn) { + const auto it = std::find(data, data + size, d); + const auto found = static_cast(it != data + size); + const auto lhs_data = data; + const auto lhs_size = static_cast(it - data); + const auto rhs_data = it + found; + const auto rhs_size = size - lhs_size - found; + + fn(lhs_data, lhs_size, rhs_data, rhs_size); +} + +inline void +divide(const std::string &str, char d, + std::function + fn) { + divide(str.data(), str.size(), d, std::move(fn)); +} + inline void split(const char *b, const char *e, char d, std::function fn) { return split(b, e, d, (std::numeric_limits::max)(), std::move(fn)); @@ -2636,6 +2954,10 @@ inline bool stream_line_reader::getline() { fixed_buffer_used_size_ = 0; glowable_buffer_.clear(); +#ifndef CPPHTTPLIB_ALLOW_LF_AS_LINE_TERMINATOR + char prev_byte = 0; +#endif + for (size_t i = 0;; i++) { char byte; auto n = strm_.read(&byte, 1); @@ -2652,7 +2974,12 @@ inline bool stream_line_reader::getline() { append(byte); +#ifdef CPPHTTPLIB_ALLOW_LF_AS_LINE_TERMINATOR if (byte == '\n') { break; } +#else + if (prev_byte == '\r' && byte == '\n') { break; } + prev_byte = byte; +#endif } return true; @@ -2671,16 +2998,7 @@ inline void stream_line_reader::append(char c) { } } -inline mmap::mmap(const char *path) -#if defined(_WIN32) - : hFile_(NULL), hMapping_(NULL) -#else - : fd_(-1) -#endif - , - size_(0), addr_(nullptr) { - open(path); -} +inline mmap::mmap(const char *path) { open(path); } inline mmap::~mmap() { close(); } @@ -2688,29 +3006,60 @@ inline bool mmap::open(const char *path) { close(); #if defined(_WIN32) - std::wstring wpath; - for (size_t i = 0; i < strlen(path); i++) { - wpath += path[i]; - } + auto wpath = u8string_to_wstring(path); + if (wpath.empty()) { return false; } +#if _WIN32_WINNT >= _WIN32_WINNT_WIN8 hFile_ = ::CreateFile2(wpath.c_str(), GENERIC_READ, FILE_SHARE_READ, OPEN_EXISTING, NULL); +#else + hFile_ = ::CreateFileW(wpath.c_str(), GENERIC_READ, FILE_SHARE_READ, NULL, + OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); +#endif if (hFile_ == INVALID_HANDLE_VALUE) { return false; } LARGE_INTEGER size{}; if (!::GetFileSizeEx(hFile_, &size)) { return false; } + // If the following line doesn't compile due to QuadPart, update Windows SDK. + // See: + // https://github.com/yhirose/cpp-httplib/issues/1903#issuecomment-2316520721 + if (static_cast(size.QuadPart) > + (std::numeric_limits::max)()) { + // `size_t` might be 32-bits, on 32-bits Windows. + return false; + } size_ = static_cast(size.QuadPart); +#if _WIN32_WINNT >= _WIN32_WINNT_WIN8 hMapping_ = ::CreateFileMappingFromApp(hFile_, NULL, PAGE_READONLY, size_, NULL); +#else + hMapping_ = ::CreateFileMappingW(hFile_, NULL, PAGE_READONLY, 0, 0, NULL); +#endif + + // Special treatment for an empty file... + if (hMapping_ == NULL && size_ == 0) { + close(); + is_open_empty_file = true; + return true; + } if (hMapping_ == NULL) { close(); return false; } +#if _WIN32_WINNT >= _WIN32_WINNT_WIN8 addr_ = ::MapViewOfFileFromApp(hMapping_, FILE_MAP_READ, 0, 0); +#else + addr_ = ::MapViewOfFile(hMapping_, FILE_MAP_READ, 0, 0, 0); +#endif + + if (addr_ == nullptr) { + close(); + return false; + } #else fd_ = ::open(path, O_RDONLY); if (fd_ == -1) { return false; } @@ -2723,22 +3072,26 @@ inline bool mmap::open(const char *path) { size_ = static_cast(sb.st_size); addr_ = ::mmap(NULL, size_, PROT_READ, MAP_PRIVATE, fd_, 0); -#endif - if (addr_ == nullptr) { + // Special treatment for an empty file... + if (addr_ == MAP_FAILED && size_ == 0) { close(); + is_open_empty_file = true; return false; } +#endif return true; } -inline bool mmap::is_open() const { return addr_ != nullptr; } +inline bool mmap::is_open() const { + return is_open_empty_file ? true : addr_ != nullptr; +} inline size_t mmap::size() const { return size_; } inline const char *mmap::data() const { - return static_cast(addr_); + return is_open_empty_file ? "" : static_cast(addr_); } inline void mmap::close() { @@ -2757,6 +3110,8 @@ inline void mmap::close() { ::CloseHandle(hFile_); hFile_ = INVALID_HANDLE_VALUE; } + + is_open_empty_file = false; #else if (addr_ != nullptr) { munmap(addr_, size_); @@ -2782,7 +3137,10 @@ template inline ssize_t handle_EINTR(T fn) { ssize_t res = 0; while (true) { res = fn(); - if (res < 0 && errno == EINTR) { continue; } + if (res < 0 && errno == EINTR) { + std::this_thread::sleep_for(std::chrono::microseconds{1}); + continue; + } break; } return res; @@ -2991,23 +3349,37 @@ private: }; #endif -inline bool keep_alive(socket_t sock, time_t keep_alive_timeout_sec) { +inline bool keep_alive(const std::atomic &svr_sock, socket_t sock, + time_t keep_alive_timeout_sec) { using namespace std::chrono; - auto start = steady_clock::now(); + + const auto interval_usec = + CPPHTTPLIB_KEEPALIVE_TIMEOUT_CHECK_INTERVAL_USECOND; + + // Avoid expensive `steady_clock::now()` call for the first time + if (select_read(sock, 0, interval_usec) > 0) { return true; } + + const auto start = steady_clock::now() - microseconds{interval_usec}; + const auto timeout = seconds{keep_alive_timeout_sec}; + while (true) { - auto val = select_read(sock, 0, 10000); + if (svr_sock == INVALID_SOCKET) { + break; // Server socket is closed + } + + auto val = select_read(sock, 0, interval_usec); if (val < 0) { - return false; + break; // Ssocket error } else if (val == 0) { - auto current = steady_clock::now(); - auto duration = duration_cast(current - start); - auto timeout = keep_alive_timeout_sec * 1000; - if (duration.count() > timeout) { return false; } - std::this_thread::sleep_for(std::chrono::milliseconds(1)); + if (steady_clock::now() - start > timeout) { + break; // Timeout + } } else { - return true; + return true; // Ready for read } } + + return false; } template @@ -3018,8 +3390,7 @@ process_server_socket_core(const std::atomic &svr_sock, socket_t sock, assert(keep_alive_max_count > 0); auto ret = false; auto count = keep_alive_max_count; - while (svr_sock != INVALID_SOCKET && count > 0 && - keep_alive(sock, keep_alive_timeout_sec)) { + while (count > 0 && keep_alive(svr_sock, sock, keep_alive_timeout_sec)) { auto close_connection = count == 1; auto connection_closed = false; ret = callback(close_connection, connection_closed); @@ -3063,10 +3434,29 @@ inline int shutdown_socket(socket_t sock) { #endif } +inline std::string escape_abstract_namespace_unix_domain(const std::string &s) { + if (s.size() > 1 && s[0] == '\0') { + auto ret = s; + ret[0] = '@'; + return ret; + } + return s; +} + +inline std::string +unescape_abstract_namespace_unix_domain(const std::string &s) { + if (s.size() > 1 && s[0] == '@') { + auto ret = s; + ret[0] = '\0'; + return ret; + } + return s; +} + template socket_t create_socket(const std::string &host, const std::string &ip, int port, int address_family, int socket_flags, bool tcp_nodelay, - SocketOptions socket_options, + bool ipv6_v6only, SocketOptions socket_options, BindOrConnect bind_or_connect) { // Get address info const char *node = nullptr; @@ -3075,7 +3465,7 @@ socket_t create_socket(const std::string &host, const std::string &ip, int port, memset(&hints, 0, sizeof(struct addrinfo)); hints.ai_socktype = SOCK_STREAM; - hints.ai_protocol = 0; + hints.ai_protocol = IPPROTO_IP; if (!ip.empty()) { node = ip.c_str(); @@ -3093,20 +3483,32 @@ socket_t create_socket(const std::string &host, const std::string &ip, int port, const auto addrlen = host.length(); if (addrlen > sizeof(sockaddr_un::sun_path)) { return INVALID_SOCKET; } +#ifdef SOCK_CLOEXEC + auto sock = socket(hints.ai_family, hints.ai_socktype | SOCK_CLOEXEC, + hints.ai_protocol); +#else auto sock = socket(hints.ai_family, hints.ai_socktype, hints.ai_protocol); +#endif + if (sock != INVALID_SOCKET) { sockaddr_un addr{}; addr.sun_family = AF_UNIX; - std::copy(host.begin(), host.end(), addr.sun_path); + + auto unescaped_host = unescape_abstract_namespace_unix_domain(host); + std::copy(unescaped_host.begin(), unescaped_host.end(), addr.sun_path); hints.ai_addr = reinterpret_cast(&addr); hints.ai_addrlen = static_cast( sizeof(addr) - sizeof(addr.sun_path) + addrlen); +#ifndef SOCK_CLOEXEC fcntl(sock, F_SETFD, FD_CLOEXEC); +#endif + if (socket_options) { socket_options(sock); } - if (!bind_or_connect(sock, hints)) { + bool dummy; + if (!bind_or_connect(sock, hints, dummy)) { close_socket(sock); sock = INVALID_SOCKET; } @@ -3123,6 +3525,7 @@ socket_t create_socket(const std::string &host, const std::string &ip, int port, #endif return INVALID_SOCKET; } + auto se = detail::scope_exit([&] { freeaddrinfo(result); }); for (auto rp = result; rp; rp = rp->ai_next) { // Create a socket @@ -3148,11 +3551,18 @@ socket_t create_socket(const std::string &host, const std::string &ip, int port, sock = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol); } #else + +#ifdef SOCK_CLOEXEC + auto sock = + socket(rp->ai_family, rp->ai_socktype | SOCK_CLOEXEC, rp->ai_protocol); +#else auto sock = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol); +#endif + #endif if (sock == INVALID_SOCKET) { continue; } -#ifndef _WIN32 +#if !defined _WIN32 && !defined SOCK_CLOEXEC if (fcntl(sock, F_SETFD, FD_CLOEXEC) == -1) { close_socket(sock); continue; @@ -3160,39 +3570,38 @@ socket_t create_socket(const std::string &host, const std::string &ip, int port, #endif if (tcp_nodelay) { - auto yes = 1; + auto opt = 1; #ifdef _WIN32 setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, - reinterpret_cast(&yes), sizeof(yes)); + reinterpret_cast(&opt), sizeof(opt)); #else setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, - reinterpret_cast(&yes), sizeof(yes)); + reinterpret_cast(&opt), sizeof(opt)); +#endif + } + + if (rp->ai_family == AF_INET6) { + auto opt = ipv6_v6only ? 1 : 0; +#ifdef _WIN32 + setsockopt(sock, IPPROTO_IPV6, IPV6_V6ONLY, + reinterpret_cast(&opt), sizeof(opt)); +#else + setsockopt(sock, IPPROTO_IPV6, IPV6_V6ONLY, + reinterpret_cast(&opt), sizeof(opt)); #endif } if (socket_options) { socket_options(sock); } - if (rp->ai_family == AF_INET6) { - auto no = 0; -#ifdef _WIN32 - setsockopt(sock, IPPROTO_IPV6, IPV6_V6ONLY, - reinterpret_cast(&no), sizeof(no)); -#else - setsockopt(sock, IPPROTO_IPV6, IPV6_V6ONLY, - reinterpret_cast(&no), sizeof(no)); -#endif - } - // bind or connect - if (bind_or_connect(sock, *rp)) { - freeaddrinfo(result); - return sock; - } + auto quit = false; + if (bind_or_connect(sock, *rp, quit)) { return sock; } close_socket(sock); + + if (quit) { break; } } - freeaddrinfo(result); return INVALID_SOCKET; } @@ -3225,6 +3634,7 @@ inline bool bind_ip_address(socket_t sock, const std::string &host) { hints.ai_protocol = 0; if (getaddrinfo(host.c_str(), "0", &hints, &result)) { return false; } + auto se = detail::scope_exit([&] { freeaddrinfo(result); }); auto ret = false; for (auto rp = result; rp; rp = rp->ai_next) { @@ -3235,7 +3645,6 @@ inline bool bind_ip_address(socket_t sock, const std::string &host) { } } - freeaddrinfo(result); return ret; } @@ -3247,6 +3656,8 @@ inline bool bind_ip_address(socket_t sock, const std::string &host) { inline std::string if2ip(int address_family, const std::string &ifn) { struct ifaddrs *ifap; getifaddrs(&ifap); + auto se = detail::scope_exit([&] { freeifaddrs(ifap); }); + std::string addr_candidate; for (auto ifa = ifap; ifa; ifa = ifa->ifa_next) { if (ifa->ifa_addr && ifn == ifa->ifa_name && @@ -3256,7 +3667,6 @@ inline std::string if2ip(int address_family, const std::string &ifn) { auto sa = reinterpret_cast(ifa->ifa_addr); char buf[INET_ADDRSTRLEN]; if (inet_ntop(AF_INET, &sa->sin_addr, buf, INET_ADDRSTRLEN)) { - freeifaddrs(ifap); return std::string(buf, INET_ADDRSTRLEN); } } else if (ifa->ifa_addr->sa_family == AF_INET6) { @@ -3269,7 +3679,6 @@ inline std::string if2ip(int address_family, const std::string &ifn) { if (s6_addr_head == 0xfc || s6_addr_head == 0xfd) { addr_candidate = std::string(buf, INET6_ADDRSTRLEN); } else { - freeifaddrs(ifap); return std::string(buf, INET6_ADDRSTRLEN); } } @@ -3277,20 +3686,21 @@ inline std::string if2ip(int address_family, const std::string &ifn) { } } } - freeifaddrs(ifap); return addr_candidate; } #endif inline socket_t create_client_socket( const std::string &host, const std::string &ip, int port, - int address_family, bool tcp_nodelay, SocketOptions socket_options, - time_t connection_timeout_sec, time_t connection_timeout_usec, - time_t read_timeout_sec, time_t read_timeout_usec, time_t write_timeout_sec, + int address_family, bool tcp_nodelay, bool ipv6_v6only, + SocketOptions socket_options, time_t connection_timeout_sec, + time_t connection_timeout_usec, time_t read_timeout_sec, + time_t read_timeout_usec, time_t write_timeout_sec, time_t write_timeout_usec, const std::string &intf, Error &error) { auto sock = create_socket( - host, ip, port, address_family, 0, tcp_nodelay, std::move(socket_options), - [&](socket_t sock2, struct addrinfo &ai) -> bool { + host, ip, port, address_family, 0, tcp_nodelay, ipv6_v6only, + std::move(socket_options), + [&](socket_t sock2, struct addrinfo &ai, bool &quit) -> bool { if (!intf.empty()) { #ifdef USE_IF2IP auto ip_from_if = if2ip(address_family, intf); @@ -3314,7 +3724,10 @@ inline socket_t create_client_socket( } error = wait_until_socket_is_ready(sock2, connection_timeout_sec, connection_timeout_usec); - if (error != Error::Success) { return false; } + if (error != Error::Success) { + if (error == Error::ConnectionTimeout) { quit = true; } + return false; + } } set_nonblocking(sock2, false); @@ -3439,7 +3852,7 @@ inline unsigned int str2tag(const std::string &s) { namespace udl { -inline constexpr unsigned int operator"" _t(const char *s, size_t l) { +inline constexpr unsigned int operator""_t(const char *s, size_t l) { return str2tag_core(s, l, 0); } @@ -3524,8 +3937,9 @@ inline bool can_compress_content_type(const std::string &content_type) { case "application/protobuf"_t: case "application/xhtml+xml"_t: return true; - default: - return !content_type.rfind("text/", 0) && tag != "text/event-stream"_t; + case "text/event-stream"_t: return false; + + default: return !content_type.rfind("text/", 0); } } @@ -3762,8 +4176,8 @@ inline bool has_header(const Headers &headers, const std::string &key) { } inline const char *get_header_value(const Headers &headers, - const std::string &key, size_t id, - const char *def) { + const std::string &key, const char *def, + size_t id) { auto rng = headers.equal_range(key); auto it = rng.first; std::advance(it, static_cast(id)); @@ -3771,14 +4185,6 @@ inline const char *get_header_value(const Headers &headers, return def; } -inline bool compare_case_ignore(const std::string &a, const std::string &b) { - if (a.size() != b.size()) { return false; } - for (size_t i = 0; i < b.size(); i++) { - if (::tolower(a[i]) != ::tolower(b[i])) { return false; } - } - return true; -} - template inline bool parse_header(const char *beg, const char *end, T fn) { // Skip trailing spaces and tabs. @@ -3801,15 +4207,27 @@ inline bool parse_header(const char *beg, const char *end, T fn) { p++; } - if (p < end) { + if (p <= end) { auto key_len = key_end - beg; if (!key_len) { return false; } auto key = std::string(beg, key_end); - auto val = compare_case_ignore(key, "Location") + auto val = case_ignore::equal(key, "Location") ? std::string(p, end) : decode_url(std::string(p, end), false); - fn(std::move(key), std::move(val)); + + // NOTE: From RFC 9110: + // Field values containing CR, LF, or NUL characters are + // invalid and dangerous, due to the varying ways that + // implementations might parse and interpret those + // characters; a recipient of CR, LF, or NUL within a field + // value MUST either reject the message or replace each of + // those characters with SP before further processing or + // forwarding of that message. + static const std::string CR_LF_NUL("\r\n\0", 3); + if (val.find_first_of(CR_LF_NUL) != std::string::npos) { return false; } + + fn(key, val); return true; } @@ -3829,27 +4247,27 @@ inline bool read_headers(Stream &strm, Headers &headers) { if (line_reader.end_with_crlf()) { // Blank line indicates end of headers. if (line_reader.size() == 2) { break; } -#ifdef CPPHTTPLIB_ALLOW_LF_AS_LINE_TERMINATOR } else { +#ifdef CPPHTTPLIB_ALLOW_LF_AS_LINE_TERMINATOR // Blank line indicates end of headers. if (line_reader.size() == 1) { break; } line_terminator_len = 1; - } #else - } else { continue; // Skip invalid line. - } #endif + } if (line_reader.size() > CPPHTTPLIB_HEADER_MAX_LENGTH) { return false; } // Exclude line terminator auto end = line_reader.ptr() + line_reader.size() - line_terminator_len; - parse_header(line_reader.ptr(), end, - [&](std::string &&key, std::string &&val) { - headers.emplace(std::move(key), std::move(val)); - }); + if (!parse_header(line_reader.ptr(), end, + [&](const std::string &key, std::string &val) { + headers.emplace(key, val); + })) { + return false; + } } return true; @@ -3937,8 +4355,19 @@ inline bool read_content_chunked(Stream &strm, T &x, assert(chunk_len == 0); - // Trailer - if (!line_reader.getline()) { return false; } + // NOTE: In RFC 9112, '7.1 Chunked Transfer Coding' mentiones "The chunked + // transfer coding is complete when a chunk with a chunk-size of zero is + // received, possibly followed by a trailer section, and finally terminated by + // an empty line". https://www.rfc-editor.org/rfc/rfc9112.html#section-7.1 + // + // In '7.1.3. Decoding Chunked', however, the pseudo-code in the section + // does't care for the existence of the final CRLF. In other words, it seems + // to be ok whether the final CRLF exists or not in the chunked data. + // https://www.rfc-editor.org/rfc/rfc9112.html#section-7.1.3 + // + // According to the reference code in RFC 9112, cpp-htpplib now allows + // chuncked transfer coding data without the final CRLF. + if (!line_reader.getline()) { return true; } while (strcmp(line_reader.ptr(), "\r\n") != 0) { if (line_reader.size() > CPPHTTPLIB_HEADER_MAX_LENGTH) { return false; } @@ -3948,8 +4377,8 @@ inline bool read_content_chunked(Stream &strm, T &x, auto end = line_reader.ptr() + line_reader.size() - line_terminator_len; parse_header(line_reader.ptr(), end, - [&](std::string &&key, std::string &&val) { - x.headers.emplace(std::move(key), std::move(val)); + [&](const std::string &key, const std::string &val) { + x.headers.emplace(key, val); }); if (!line_reader.getline()) { return false; } @@ -3959,8 +4388,8 @@ inline bool read_content_chunked(Stream &strm, T &x, } inline bool is_chunked_transfer_encoding(const Headers &headers) { - return compare_case_ignore( - get_header_value(headers, "Transfer-Encoding", 0, ""), "chunked"); + return case_ignore::equal( + get_header_value(headers, "Transfer-Encoding", "", 0), "chunked"); } template @@ -4026,8 +4455,14 @@ bool read_content(Stream &strm, T &x, size_t payload_max_length, int &status, } else if (!has_header(x.headers, "Content-Length")) { ret = read_content_without_length(strm, out); } else { - auto len = get_header_value_u64(x.headers, "Content-Length", 0, 0); - if (len > payload_max_length) { + auto is_invalid_value = false; + auto len = get_header_value_u64(x.headers, "Content-Length", + std::numeric_limits::max(), + 0, is_invalid_value); + + if (is_invalid_value) { + ret = false; + } else if (len > payload_max_length) { exceed_payload_max_length = true; skip_content_with_length(strm, len); ret = false; @@ -4042,13 +4477,36 @@ bool read_content(Stream &strm, T &x, size_t payload_max_length, int &status, } return ret; }); -} // namespace detail +} + +inline ssize_t write_request_line(Stream &strm, const std::string &method, + const std::string &path) { + std::string s = method; + s += " "; + s += path; + s += " HTTP/1.1\r\n"; + return strm.write(s.data(), s.size()); +} + +inline ssize_t write_response_line(Stream &strm, int status) { + std::string s = "HTTP/1.1 "; + s += std::to_string(status); + s += " "; + s += httplib::status_message(status); + s += "\r\n"; + return strm.write(s.data(), s.size()); +} inline ssize_t write_headers(Stream &strm, const Headers &headers) { ssize_t write_len = 0; for (const auto &x : headers) { - auto len = - strm.write_format("%s: %s\r\n", x.first.c_str(), x.second.c_str()); + std::string s; + s = x.first; + s += ": "; + s += x.second; + s += "\r\n"; + + auto len = strm.write(s.data(), s.size()); if (len < 0) { return len; } write_len += len; } @@ -4302,22 +4760,22 @@ inline std::string params_to_query_str(const Params ¶ms) { return query; } -inline void parse_query_text(const std::string &s, Params ¶ms) { +inline void parse_query_text(const char *data, std::size_t size, + Params ¶ms) { std::set cache; - split(s.data(), s.data() + s.size(), '&', [&](const char *b, const char *e) { + split(data, data + size, '&', [&](const char *b, const char *e) { std::string kv(b, e); if (cache.find(kv) != cache.end()) { return; } - cache.insert(kv); + cache.insert(std::move(kv)); std::string key; std::string val; - split(b, e, '=', [&](const char *b2, const char *e2) { - if (key.empty()) { - key.assign(b2, e2); - } else { - val.assign(b2, e2); - } - }); + divide(b, static_cast(e - b), '=', + [&](const char *lhs_data, std::size_t lhs_size, const char *rhs_data, + std::size_t rhs_size) { + key.assign(lhs_data, lhs_size); + val.assign(rhs_data, rhs_size); + }); if (!key.empty()) { params.emplace(decode_url(key, true), decode_url(val, true)); @@ -4325,6 +4783,10 @@ inline void parse_query_text(const std::string &s, Params ¶ms) { }); } +inline void parse_query_text(const std::string &s, Params ¶ms) { + parse_query_text(s.data(), s.size(), params); +} + inline bool parse_multipart_boundary(const std::string &content_type, std::string &boundary) { auto boundary_keyword = "boundary="; @@ -4365,35 +4827,44 @@ inline bool parse_range_header(const std::string &s, Ranges &ranges) { #else inline bool parse_range_header(const std::string &s, Ranges &ranges) try { #endif - static auto re_first_range = std::regex(R"(bytes=(\d*-\d*(?:,\s*\d*-\d*)*))"); - std::smatch m; - if (std::regex_match(s, m, re_first_range)) { - auto pos = static_cast(m.position(1)); - auto len = static_cast(m.length(1)); + auto is_valid = [](const std::string &str) { + return std::all_of(str.cbegin(), str.cend(), + [](unsigned char c) { return std::isdigit(c); }); + }; + + if (s.size() > 7 && s.compare(0, 6, "bytes=") == 0) { + const auto pos = static_cast(6); + const auto len = static_cast(s.size() - 6); auto all_valid_ranges = true; split(&s[pos], &s[pos + len], ',', [&](const char *b, const char *e) { if (!all_valid_ranges) { return; } - static auto re_another_range = std::regex(R"(\s*(\d*)-(\d*))"); - std::cmatch cm; - if (std::regex_match(b, e, cm, re_another_range)) { - ssize_t first = -1; - if (!cm.str(1).empty()) { - first = static_cast(std::stoll(cm.str(1))); - } - ssize_t last = -1; - if (!cm.str(2).empty()) { - last = static_cast(std::stoll(cm.str(2))); - } - - if (first != -1 && last != -1 && first > last) { - all_valid_ranges = false; - return; - } - ranges.emplace_back(std::make_pair(first, last)); + const auto it = std::find(b, e, '-'); + if (it == e) { + all_valid_ranges = false; + return; } + + const auto lhs = std::string(b, it); + const auto rhs = std::string(it + 1, e); + if (!is_valid(lhs) || !is_valid(rhs)) { + all_valid_ranges = false; + return; + } + + const auto first = + static_cast(lhs.empty() ? -1 : std::stoll(lhs)); + const auto last = + static_cast(rhs.empty() ? -1 : std::stoll(rhs)); + if ((first == -1 && last == -1) || + (first != -1 && last != -1 && first > last)) { + all_valid_ranges = false; + return; + } + + ranges.emplace_back(first, last); }); - return all_valid_ranges; + return all_valid_ranges && !ranges.empty(); } return false; #ifdef CPPHTTPLIB_NO_EXCEPTIONS @@ -4452,7 +4923,7 @@ public: const auto header = buf_head(pos); if (!parse_header(header.data(), header.data() + header.size(), - [&](std::string &&, std::string &&) {})) { + [&](const std::string &, const std::string &) {})) { is_valid_ = false; return false; } @@ -4562,7 +5033,9 @@ private: const std::string &b) const { if (a.size() < b.size()) { return false; } for (size_t i = 0; i < b.size(); i++) { - if (::tolower(a[i]) != ::tolower(b[i])) { return false; } + if (case_ignore::to_lower(a[i]) != case_ignore::to_lower(b[i])) { + return false; + } } return true; } @@ -4645,16 +5118,6 @@ private: size_t buf_epos_ = 0; }; -inline std::string to_lower(const char *beg, const char *end) { - std::string out; - auto it = beg; - while (it != end) { - out += static_cast(::tolower(*it)); - it++; - } - return out; -} - inline std::string random_string(size_t length) { static const char data[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; @@ -4768,7 +5231,18 @@ inline bool range_error(Request &req, Response &res) { last_pos = contant_len - 1; } - if (last_pos == -1) { last_pos = contant_len - 1; } + // NOTE: RFC-9110 '14.1.2. Byte Ranges': + // A client can limit the number of bytes requested without knowing the + // size of the selected representation. If the last-pos value is absent, + // or if the value is greater than or equal to the current length of the + // representation data, the byte range is interpreted as the remainder of + // the representation (i.e., the server replaces the value of last-pos + // with a value that is one less than the current length of the selected + // representation). + // https://www.rfc-editor.org/rfc/rfc9110.html#section-14.1.2-6 + if (last_pos == -1 || last_pos >= contant_len) { + last_pos = contant_len - 1; + } // Range must be within content length if (!(0 <= first_pos && first_pos <= last_pos && @@ -4795,12 +5269,11 @@ inline bool range_error(Request &req, Response &res) { inline std::pair get_range_offset_and_length(Range r, size_t content_length) { - (void)(content_length); // patch to get rid of "unused parameter" on release build assert(r.first != -1 && r.second != -1); assert(0 <= r.first && r.first < static_cast(content_length)); assert(r.first <= r.second && r.second < static_cast(content_length)); - + (void)(content_length); return std::make_pair(r.first, static_cast(r.second - r.first) + 1); } @@ -5230,6 +5703,7 @@ inline void hosted_at(const std::string &hostname, #endif return; } + auto se = detail::scope_exit([&] { freeaddrinfo(result); }); for (auto rp = result; rp; rp = rp->ai_next) { const auto &addr = @@ -5241,8 +5715,6 @@ inline void hosted_at(const std::string &hostname, addrs.push_back(ip); } } - - freeaddrinfo(result); } inline std::string append_query_params(const std::string &path, @@ -5291,8 +5763,8 @@ inline bool Request::has_header(const std::string &key) const { } inline std::string Request::get_header_value(const std::string &key, - size_t id) const { - return detail::get_header_value(headers, key, id, ""); + const char *def, size_t id) const { + return detail::get_header_value(headers, key, def, id); } inline size_t Request::get_header_value_count(const std::string &key) const { @@ -5302,7 +5774,8 @@ inline size_t Request::get_header_value_count(const std::string &key) const { inline void Request::set_header(const std::string &key, const std::string &val) { - if (!detail::has_crlf(key) && !detail::has_crlf(val)) { + if (detail::fields::is_field_name(key) && + detail::fields::is_field_value(val)) { headers.emplace(key, val); } } @@ -5356,8 +5829,9 @@ inline bool Response::has_header(const std::string &key) const { } inline std::string Response::get_header_value(const std::string &key, + const char *def, size_t id) const { - return detail::get_header_value(headers, key, id, ""); + return detail::get_header_value(headers, key, def, id); } inline size_t Response::get_header_value_count(const std::string &key) const { @@ -5367,13 +5841,14 @@ inline size_t Response::get_header_value_count(const std::string &key) const { inline void Response::set_header(const std::string &key, const std::string &val) { - if (!detail::has_crlf(key) && !detail::has_crlf(val)) { + if (detail::fields::is_field_name(key) && + detail::fields::is_field_value(val)) { headers.emplace(key, val); } } inline void Response::set_redirect(const std::string &url, int stat) { - if (!detail::has_crlf(url)) { + if (detail::fields::is_field_value(url)) { set_header("Location", url); if (300 <= stat && stat < 400) { this->status = stat; @@ -5436,14 +5911,25 @@ inline void Response::set_chunked_content_provider( is_chunked_content_provider_ = true; } +inline void Response::set_file_content(const std::string &path, + const std::string &content_type) { + file_content_path_ = path; + file_content_content_type_ = content_type; +} + +inline void Response::set_file_content(const std::string &path) { + file_content_path_ = path; +} + // Result implementation inline bool Result::has_request_header(const std::string &key) const { return request_headers_.find(key) != request_headers_.end(); } inline std::string Result::get_request_header_value(const std::string &key, + const char *def, size_t id) const { - return detail::get_header_value(request_headers_, key, id, ""); + return detail::get_header_value(request_headers_, key, def, id); } inline size_t @@ -5584,6 +6070,8 @@ inline socket_t BufferStream::socket() const { return 0; } inline const std::string &BufferStream::get_buffer() const { return buffer; } inline PathParamsMatcher::PathParamsMatcher(const std::string &pattern) { + static constexpr char marker[] = "/:"; + // One past the last ending position of a path param substring std::size_t last_param_end = 0; @@ -5596,13 +6084,14 @@ inline PathParamsMatcher::PathParamsMatcher(const std::string &pattern) { #endif while (true) { - const auto marker_pos = pattern.find(marker, last_param_end); + const auto marker_pos = pattern.find( + marker, last_param_end == 0 ? last_param_end : last_param_end - 1); if (marker_pos == std::string::npos) { break; } static_fragments_.push_back( - pattern.substr(last_param_end, marker_pos - last_param_end)); + pattern.substr(last_param_end, marker_pos - last_param_end + 1)); - const auto param_name_start = marker_pos + 1; + const auto param_name_start = marker_pos + 2; auto sep_pos = pattern.find(separator, param_name_start); if (sep_pos == std::string::npos) { sep_pos = pattern.length(); } @@ -5664,7 +6153,7 @@ inline bool PathParamsMatcher::match(Request &request) const { request.path_params.emplace( param_name, request.path.substr(starting_pos, sep_pos - starting_pos)); - // Mark everythin up to '/' as matched + // Mark everything up to '/' as matched starting_pos = sep_pos + 1; } // Returns false if the path is longer than the pattern @@ -5763,7 +6252,8 @@ inline bool Server::set_base_dir(const std::string &dir, inline bool Server::set_mount_point(const std::string &mount_point, const std::string &dir, Headers headers) { - if (detail::is_dir(dir)) { + detail::FileStat stat(dir); + if (stat.is_dir()) { std::string mnt = !mount_point.empty() ? mount_point : "/"; if (!mnt.empty() && mnt[0] == '/') { base_dirs_.push_back({mnt, dir, std::move(headers)}); @@ -5800,12 +6290,14 @@ inline Server &Server::set_file_request_handler(Handler handler) { return *this; } -inline Server &Server::set_error_handler(HandlerWithResponse handler) { +inline Server &Server::set_error_handler_core(HandlerWithResponse handler, + std::true_type) { error_handler_ = std::move(handler); return *this; } -inline Server &Server::set_error_handler(Handler handler) { +inline Server &Server::set_error_handler_core(Handler handler, + std::false_type) { error_handler_ = [handler](const Request &req, Response &res) { handler(req, res); return HandlerResponse::Handled; @@ -5849,6 +6341,11 @@ inline Server &Server::set_tcp_nodelay(bool on) { return *this; } +inline Server &Server::set_ipv6_v6only(bool on) { + ipv6_v6only_ = on; + return *this; +} + inline Server &Server::set_socket_options(SocketOptions socket_options) { socket_options_ = std::move(socket_options); return *this; @@ -5900,27 +6397,27 @@ inline Server &Server::set_payload_max_length(size_t length) { inline bool Server::bind_to_port(const std::string &host, int port, int socket_flags) { - return bind_internal(host, port, socket_flags) >= 0; + auto ret = bind_internal(host, port, socket_flags); + if (ret == -1) { is_decommisioned = true; } + return ret >= 0; } inline int Server::bind_to_any_port(const std::string &host, int socket_flags) { - return bind_internal(host, 0, socket_flags); + auto ret = bind_internal(host, 0, socket_flags); + if (ret == -1) { is_decommisioned = true; } + return ret; } -inline bool Server::listen_after_bind() { - auto se = detail::scope_exit([&]() { done_ = true; }); - return listen_internal(); -} +inline bool Server::listen_after_bind() { return listen_internal(); } inline bool Server::listen(const std::string &host, int port, int socket_flags) { - auto se = detail::scope_exit([&]() { done_ = true; }); return bind_to_port(host, port, socket_flags) && listen_internal(); } inline bool Server::is_running() const { return is_running_; } inline void Server::wait_until_ready() const { - while (!is_running() && !done_) { + while (!is_running_ && !is_decommisioned) { std::this_thread::sleep_for(std::chrono::milliseconds{1}); } } @@ -5932,8 +6429,11 @@ inline void Server::stop() { detail::shutdown_socket(sock); detail::close_socket(sock); } + is_decommisioned = false; } +inline void Server::decommission() { is_decommisioned = true; } + inline bool Server::parse_request_line(const char *s, Request &req) const { auto len = strlen(s); if (len < 2 || s[len - 2] != '\r' || s[len - 1] != '\n') { return false; } @@ -5972,26 +6472,13 @@ inline bool Server::parse_request_line(const char *s, Request &req) const { } } - size_t count = 0; - - detail::split(req.target.data(), req.target.data() + req.target.size(), '?', - 2, [&](const char *b, const char *e) { - switch (count) { - case 0: - req.path = detail::decode_url(std::string(b, e), false); - break; - case 1: { - if (e - b > 0) { - detail::parse_query_text(std::string(b, e), req.params); - } - break; - } - default: break; - } - count++; - }); - - if (count > 2) { return false; } + detail::divide(req.target, '?', + [&](const char *lhs_data, std::size_t lhs_size, + const char *rhs_data, std::size_t rhs_size) { + req.path = detail::decode_url( + std::string(lhs_data, lhs_size), false); + detail::parse_query_text(rhs_data, rhs_size, req.params); + }); } return true; @@ -6030,23 +6517,24 @@ inline bool Server::write_response_core(Stream &strm, bool close_connection, if (close_connection || req.get_header_value("Connection") == "close") { res.set_header("Connection", "close"); } else { - std::stringstream ss; - ss << "timeout=" << keep_alive_timeout_sec_ - << ", max=" << keep_alive_max_count_; - res.set_header("Keep-Alive", ss.str()); + std::string s = "timeout="; + s += std::to_string(keep_alive_timeout_sec_); + s += ", max="; + s += std::to_string(keep_alive_max_count_); + res.set_header("Keep-Alive", s); } - if (!res.has_header("Content-Type") && - (!res.body.empty() || res.content_length_ > 0 || res.content_provider_)) { + if ((!res.body.empty() || res.content_length_ > 0 || res.content_provider_) && + !res.has_header("Content-Type")) { res.set_header("Content-Type", "text/plain"); } - if (!res.has_header("Content-Length") && res.body.empty() && - !res.content_length_ && !res.content_provider_) { + if (res.body.empty() && !res.content_length_ && !res.content_provider_ && + !res.has_header("Content-Length")) { res.set_header("Content-Length", "0"); } - if (!res.has_header("Accept-Ranges") && req.method == "HEAD") { + if (req.method == "HEAD" && !res.has_header("Accept-Ranges")) { res.set_header("Accept-Ranges", "bytes"); } @@ -6055,12 +6543,7 @@ inline bool Server::write_response_core(Stream &strm, bool close_connection, // Response line and headers { detail::BufferStream bstrm; - - if (!bstrm.write_format("HTTP/1.1 %d %s\r\n", res.status, - status_message(res.status))) { - return false; - } - + if (!detail::write_response_line(bstrm, res.status)) { return false; } if (!header_writer_(bstrm, res.headers)) { return false; } // Flush buffer @@ -6254,7 +6737,14 @@ inline bool Server::handle_file_request(const Request &req, Response &res, auto path = entry.base_dir + sub_path; if (path.back() == '/') { path += "index.html"; } - if (detail::is_file(path)) { + detail::FileStat stat(path); + + if (stat.is_dir()) { + res.set_redirect(sub_path + "/", StatusCode::MovedPermanently_301); + return true; + } + + if (stat.is_file()) { for (const auto &kv : entry.headers) { res.set_header(kv.first, kv.second); } @@ -6289,8 +6779,8 @@ Server::create_server_socket(const std::string &host, int port, SocketOptions socket_options) const { return detail::create_socket( host, std::string(), port, address_family_, socket_flags, tcp_nodelay_, - std::move(socket_options), - [](socket_t sock, struct addrinfo &ai) -> bool { + ipv6_v6only_, std::move(socket_options), + [](socket_t sock, struct addrinfo &ai, bool & /*quit*/) -> bool { if (::bind(sock, ai.ai_addr, static_cast(ai.ai_addrlen))) { return false; } @@ -6301,6 +6791,8 @@ Server::create_server_socket(const std::string &host, int port, inline int Server::bind_internal(const std::string &host, int port, int socket_flags) { + if (is_decommisioned) { return -1; } + if (!is_valid()) { return -1; } svr_sock_ = create_server_socket(host, port, socket_flags, socket_options_); @@ -6326,6 +6818,8 @@ inline int Server::bind_internal(const std::string &host, int port, } inline bool Server::listen_internal() { + if (is_decommisioned) { return false; } + auto ret = true; is_running_ = true; auto se = detail::scope_exit([&]() { is_running_ = false; }); @@ -6346,13 +6840,22 @@ inline bool Server::listen_internal() { #ifndef _WIN32 } #endif + +#if defined _WIN32 + // sockets conneced via WASAccept inherit flags NO_HANDLE_INHERIT, + // OVERLAPPED + socket_t sock = WSAAccept(svr_sock_, nullptr, nullptr, nullptr, 0); +#elif defined SOCK_CLOEXEC + socket_t sock = accept4(svr_sock_, nullptr, nullptr, SOCK_CLOEXEC); +#else socket_t sock = accept(svr_sock_, nullptr, nullptr); +#endif if (sock == INVALID_SOCKET) { if (errno == EMFILE) { // The per-process limit of open file descriptors has been reached. // Try to accept new connections after a short sleep. - std::this_thread::sleep_for(std::chrono::milliseconds(1)); + std::this_thread::sleep_for(std::chrono::microseconds{1}); continue; } else if (errno == EINTR || errno == EAGAIN) { continue; @@ -6406,6 +6909,7 @@ inline bool Server::listen_internal() { task_queue->shutdown(); } + is_decommisioned = !ret; return ret; } @@ -6503,7 +7007,7 @@ inline bool Server::dispatch_request(Request &req, Response &res, inline void Server::apply_ranges(const Request &req, Response &res, std::string &content_type, std::string &boundary) const { - if (req.ranges.size() > 1) { + if (req.ranges.size() > 1 && res.status == StatusCode::PartialContent_206) { auto it = res.headers.find("Content-Type"); if (it != res.headers.end()) { content_type = it->second; @@ -6521,7 +7025,7 @@ inline void Server::apply_ranges(const Request &req, Response &res, if (res.body.empty()) { if (res.content_length_ > 0) { size_t length = 0; - if (req.ranges.empty()) { + if (req.ranges.empty() || res.status != StatusCode::PartialContent_206) { length = res.content_length_; } else if (req.ranges.size() == 1) { auto offset_and_length = detail::get_range_offset_and_length( @@ -6550,7 +7054,7 @@ inline void Server::apply_ranges(const Request &req, Response &res, } } } else { - if (req.ranges.empty()) { + if (req.ranges.empty() || res.status != StatusCode::PartialContent_206) { ; } else if (req.ranges.size() == 1) { auto offset_and_length = @@ -6621,7 +7125,9 @@ inline bool Server::dispatch_request_for_content_reader( } inline bool -Server::process_request(Stream &strm, bool close_connection, +Server::process_request(Stream &strm, const std::string &remote_addr, + int remote_port, const std::string &local_addr, + int local_port, bool close_connection, bool &connection_closed, const std::function &setup_request) { std::array buf{}; @@ -6675,11 +7181,13 @@ Server::process_request(Stream &strm, bool close_connection, connection_closed = true; } - strm.get_remote_ip_and_port(req.remote_addr, req.remote_port); + req.remote_addr = remote_addr; + req.remote_port = remote_port; req.set_header("REMOTE_ADDR", req.remote_addr); req.set_header("REMOTE_PORT", std::to_string(req.remote_port)); - strm.get_local_ip_and_port(req.local_addr, req.local_port); + req.local_addr = local_addr; + req.local_port = local_port; req.set_header("LOCAL_ADDR", req.local_addr); req.set_header("LOCAL_PORT", std::to_string(req.local_port)); @@ -6701,13 +7209,20 @@ Server::process_request(Stream &strm, bool close_connection, switch (status) { case StatusCode::Continue_100: case StatusCode::ExpectationFailed_417: - strm.write_format("HTTP/1.1 %d %s\r\n\r\n", status, - status_message(status)); + detail::write_response_line(strm, status); + strm.write("\r\n"); break; - default: return write_response(strm, close_connection, req, res); + default: + connection_closed = true; + return write_response(strm, true, req, res); } } + // Setup `is_connection_closed` method + req.is_connection_closed = [&]() { + return !detail::is_socket_alive(strm.socket()); + }; + // Routing auto routed = false; #ifdef CPPHTTPLIB_NO_EXCEPTIONS @@ -6750,6 +7265,32 @@ Server::process_request(Stream &strm, bool close_connection, : StatusCode::PartialContent_206; } + // Serve file content by using a content provider + if (!res.file_content_path_.empty()) { + const auto &path = res.file_content_path_; + auto mm = std::make_shared(path.c_str()); + if (!mm->is_open()) { + res.body.clear(); + res.content_length_ = 0; + res.content_provider_ = nullptr; + res.status = StatusCode::NotFound_404; + return write_response(strm, close_connection, req, res); + } + + auto content_type = res.file_content_content_type_; + if (content_type.empty()) { + content_type = detail::find_content_type( + path, file_extension_and_mimetype_map_, default_file_mimetype_); + } + + res.set_content_provider( + mm->size(), content_type, + [mm](size_t offset, size_t length, DataSink &sink) -> bool { + sink.write(mm->data() + offset, length); + return true; + }); + } + if (detail::range_error(req, res)) { res.body.clear(); res.content_length_ = 0; @@ -6769,12 +7310,21 @@ Server::process_request(Stream &strm, bool close_connection, inline bool Server::is_valid() const { return true; } inline bool Server::process_and_close_socket(socket_t sock) { + std::string remote_addr; + int remote_port = 0; + detail::get_remote_ip_and_port(sock, remote_addr, remote_port); + + std::string local_addr; + int local_port = 0; + detail::get_local_ip_and_port(sock, local_addr, local_port); + auto ret = detail::process_server_socket( svr_sock_, sock, keep_alive_max_count_, keep_alive_timeout_sec_, read_timeout_sec_, read_timeout_usec_, write_timeout_sec_, write_timeout_usec_, - [this](Stream &strm, bool close_connection, bool &connection_closed) { - return process_request(strm, close_connection, connection_closed, + [&](Stream &strm, bool close_connection, bool &connection_closed) { + return process_request(strm, remote_addr, remote_port, local_addr, + local_port, close_connection, connection_closed, nullptr); }); @@ -6793,8 +7343,8 @@ inline ClientImpl::ClientImpl(const std::string &host, int port) inline ClientImpl::ClientImpl(const std::string &host, int port, const std::string &client_cert_path, const std::string &client_key_path) - : host_(host), port_(port), - host_and_port_(adjust_host_string(host) + ":" + std::to_string(port)), + : host_(detail::escape_abstract_namespace_unix_domain(host)), port_(port), + host_and_port_(adjust_host_string(host_) + ":" + std::to_string(port)), client_cert_path_(client_cert_path), client_key_path_(client_key_path) {} inline ClientImpl::~ClientImpl() { @@ -6825,6 +7375,7 @@ inline void ClientImpl::copy_settings(const ClientImpl &rhs) { url_encode_ = rhs.url_encode_; address_family_ = rhs.address_family_; tcp_nodelay_ = rhs.tcp_nodelay_; + ipv6_v6only_ = rhs.ipv6_v6only_; socket_options_ = rhs.socket_options_; compress_ = rhs.compress_; decompress_ = rhs.decompress_; @@ -6845,6 +7396,8 @@ inline void ClientImpl::copy_settings(const ClientImpl &rhs) { #endif #ifdef CPPHTTPLIB_OPENSSL_SUPPORT server_certificate_verification_ = rhs.server_certificate_verification_; + server_hostname_verification_ = rhs.server_hostname_verification_; + server_certificate_verifier_ = rhs.server_certificate_verifier_; #endif logger_ = rhs.logger_; } @@ -6853,9 +7406,9 @@ inline socket_t ClientImpl::create_client_socket(Error &error) const { if (!proxy_host_.empty() && proxy_port_ != -1) { return detail::create_client_socket( proxy_host_, std::string(), proxy_port_, address_family_, tcp_nodelay_, - socket_options_, connection_timeout_sec_, connection_timeout_usec_, - read_timeout_sec_, read_timeout_usec_, write_timeout_sec_, - write_timeout_usec_, interface_, error); + ipv6_v6only_, socket_options_, connection_timeout_sec_, + connection_timeout_usec_, read_timeout_sec_, read_timeout_usec_, + write_timeout_sec_, write_timeout_usec_, interface_, error); } // Check is custom IP specified for host_ @@ -6864,10 +7417,10 @@ inline socket_t ClientImpl::create_client_socket(Error &error) const { if (it != addr_map_.end()) { ip = it->second; } return detail::create_client_socket( - host_, ip, port_, address_family_, tcp_nodelay_, socket_options_, - connection_timeout_sec_, connection_timeout_usec_, read_timeout_sec_, - read_timeout_usec_, write_timeout_sec_, write_timeout_usec_, interface_, - error); + host_, ip, port_, address_family_, tcp_nodelay_, ipv6_v6only_, + socket_options_, connection_timeout_sec_, connection_timeout_usec_, + read_timeout_sec_, read_timeout_usec_, write_timeout_sec_, + write_timeout_usec_, interface_, error); } inline bool ClientImpl::create_and_connect_socket(Socket &socket, @@ -6956,6 +7509,18 @@ inline bool ClientImpl::send(Request &req, Response &res, Error &error) { return ret; } +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT +inline bool ClientImpl::is_ssl_peer_could_be_closed(SSL *ssl) const { + detail::set_nonblocking(socket_.sock, true); + auto se = detail::scope_exit( + [&]() { detail::set_nonblocking(socket_.sock, false); }); + + char buf[1]; + return !SSL_peek(ssl, buf, 1) && + SSL_get_error(ssl, 0) == SSL_ERROR_ZERO_RETURN; +} +#endif + inline bool ClientImpl::send_(Request &req, Response &res, Error &error) { { std::lock_guard guard(socket_mutex_); @@ -6967,6 +7532,13 @@ inline bool ClientImpl::send_(Request &req, Response &res, Error &error) { auto is_alive = false; if (socket_.is_open()) { is_alive = detail::is_socket_alive(socket_.sock); + +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT + if (is_alive && is_ssl()) { + if (is_ssl_peer_could_be_closed(socket_.ssl)) { is_alive = false; } + } +#endif + if (!is_alive) { // Attempt to avoid sigpipe by shutting down nongracefully if it seems // like the other side has already closed the connection Also, there @@ -7144,7 +7716,7 @@ inline bool ClientImpl::redirect(Request &req, Response &res, Error &error) { if (location.empty()) { return false; } const static std::regex re( - R"((?:(https?):)?(?://(?:\[([\d:]+)\]|([^:/?#]+))(?::(\d+))?)?([^?#]*)(\?[^#]*)?(?:#.*)?)"); + R"((?:(https?):)?(?://(?:\[([a-fA-F\d:]+)\]|([^:/?#]+))(?::(\d+))?)?([^?#]*)(\?[^#]*)?(?:#.*)?)"); std::smatch m; if (!std::regex_match(location, m, re)) { return false; } @@ -7243,12 +7815,26 @@ inline bool ClientImpl::write_request(Stream &strm, Request &req, if (!req.has_header("Accept")) { req.set_header("Accept", "*/*"); } -#ifndef CPPHTTPLIB_NO_DEFAULT_USER_AGENT - if (!req.has_header("User-Agent")) { - auto agent = std::string("cpp-httplib/") + CPPHTTPLIB_VERSION; - req.set_header("User-Agent", agent); - } + if (!req.content_receiver) { + if (!req.has_header("Accept-Encoding")) { + std::string accept_encoding; +#ifdef CPPHTTPLIB_BROTLI_SUPPORT + accept_encoding = "br"; #endif +#ifdef CPPHTTPLIB_ZLIB_SUPPORT + if (!accept_encoding.empty()) { accept_encoding += ", "; } + accept_encoding += "gzip, deflate"; +#endif + req.set_header("Accept-Encoding", accept_encoding); + } + +#ifndef CPPHTTPLIB_NO_DEFAULT_USER_AGENT + if (!req.has_header("User-Agent")) { + auto agent = std::string("cpp-httplib/") + CPPHTTPLIB_VERSION; + req.set_header("User-Agent", agent); + } +#endif + }; if (req.body.empty()) { if (req.content_provider_) { @@ -7308,8 +7894,14 @@ inline bool ClientImpl::write_request(Stream &strm, Request &req, { detail::BufferStream bstrm; - const auto &path = url_encode_ ? detail::encode_url(req.path) : req.path; - bstrm.write_format("%s %s HTTP/1.1\r\n", req.method.c_str(), path.c_str()); + const auto &path_with_query = + req.params.empty() ? req.path + : append_query_params(req.path, req.params); + + const auto &path = + url_encode_ ? detail::encode_url(path_with_query) : path_with_query; + + detail::write_request_line(bstrm, req.method, path); header_writer_(bstrm, req.headers); @@ -7417,11 +8009,12 @@ inline Result ClientImpl::send_with_content_provider( const std::string &method, const std::string &path, const Headers &headers, const char *body, size_t content_length, ContentProvider content_provider, ContentProviderWithoutLength content_provider_without_length, - const std::string &content_type) { + const std::string &content_type, Progress progress) { Request req; req.method = method; req.headers = headers; req.path = path; + req.progress = progress; auto error = Error::Success; @@ -7448,9 +8041,7 @@ inline bool ClientImpl::process_request(Stream &strm, Request &req, if (is_ssl()) { auto is_proxy_enabled = !proxy_host_.empty() && proxy_port_ != -1; if (!is_proxy_enabled) { - char buf[1]; - if (SSL_peek(socket_.ssl, buf, 1) == 0 && - SSL_get_error(socket_.ssl, 0) == SSL_ERROR_ZERO_RETURN) { + if (is_ssl_peer_could_be_closed(socket_.ssl)) { error = Error::SSLPeerCouldBeClosed_; return false; } @@ -7468,7 +8059,9 @@ inline bool ClientImpl::process_request(Stream &strm, Request &req, // Body if ((res.status != StatusCode::NoContent_204) && req.method != "HEAD" && req.method != "CONNECT") { - auto redirect = 300 < res.status && res.status < 400 && follow_location_; + auto redirect = 300 < res.status && res.status < 400 && + res.status != StatusCode::NotModified_304 && + follow_location_; if (req.response_handler && !redirect) { if (!req.response_handler(res)) { @@ -7489,9 +8082,7 @@ inline bool ClientImpl::process_request(Stream &strm, Request &req, : static_cast( [&](const char *buf, size_t n, uint64_t /*off*/, uint64_t /*len*/) { - if (res.body.size() + n > res.body.max_size()) { - return false; - } + assert(res.body.size() + n <= res.body.max_size()); res.body.append(buf, n); return true; }); @@ -7503,12 +8094,25 @@ inline bool ClientImpl::process_request(Stream &strm, Request &req, return ret; }; - int dummy_status; - if (!detail::read_content(strm, res, (std::numeric_limits::max)(), - dummy_status, std::move(progress), std::move(out), - decompress_)) { - if (error != Error::Canceled) { error = Error::Read; } - return false; + if (res.has_header("Content-Length")) { + if (!req.content_receiver) { + auto len = res.get_header_value_u64("Content-Length"); + if (len > res.body.max_size()) { + error = Error::Read; + return false; + } + res.body.reserve(static_cast(len)); + } + } + + if (res.status != StatusCode::NotModified_304) { + int dummy_status; + if (!detail::read_content(strm, res, (std::numeric_limits::max)(), + dummy_status, std::move(progress), + std::move(out), decompress_)) { + if (error != Error::Canceled) { error = Error::Read; } + return false; + } } } @@ -7717,14 +8321,22 @@ inline Result ClientImpl::Post(const std::string &path, inline Result ClientImpl::Post(const std::string &path, const char *body, size_t content_length, const std::string &content_type) { - return Post(path, Headers(), body, content_length, content_type); + return Post(path, Headers(), body, content_length, content_type, nullptr); } inline Result ClientImpl::Post(const std::string &path, const Headers &headers, const char *body, size_t content_length, const std::string &content_type) { return send_with_content_provider("POST", path, headers, body, content_length, - nullptr, nullptr, content_type); + nullptr, nullptr, content_type, nullptr); +} + +inline Result ClientImpl::Post(const std::string &path, const Headers &headers, + const char *body, size_t content_length, + const std::string &content_type, + Progress progress) { + return send_with_content_provider("POST", path, headers, body, content_length, + nullptr, nullptr, content_type, progress); } inline Result ClientImpl::Post(const std::string &path, const std::string &body, @@ -7732,12 +8344,27 @@ inline Result ClientImpl::Post(const std::string &path, const std::string &body, return Post(path, Headers(), body, content_type); } +inline Result ClientImpl::Post(const std::string &path, const std::string &body, + const std::string &content_type, + Progress progress) { + return Post(path, Headers(), body, content_type, progress); +} + inline Result ClientImpl::Post(const std::string &path, const Headers &headers, const std::string &body, const std::string &content_type) { return send_with_content_provider("POST", path, headers, body.data(), - body.size(), nullptr, nullptr, - content_type); + body.size(), nullptr, nullptr, content_type, + nullptr); +} + +inline Result ClientImpl::Post(const std::string &path, const Headers &headers, + const std::string &body, + const std::string &content_type, + Progress progress) { + return send_with_content_provider("POST", path, headers, body.data(), + body.size(), nullptr, nullptr, content_type, + progress); } inline Result ClientImpl::Post(const std::string &path, const Params ¶ms) { @@ -7763,14 +8390,15 @@ inline Result ClientImpl::Post(const std::string &path, const Headers &headers, const std::string &content_type) { return send_with_content_provider("POST", path, headers, nullptr, content_length, std::move(content_provider), - nullptr, content_type); + nullptr, content_type, nullptr); } inline Result ClientImpl::Post(const std::string &path, const Headers &headers, ContentProviderWithoutLength content_provider, const std::string &content_type) { return send_with_content_provider("POST", path, headers, nullptr, 0, nullptr, - std::move(content_provider), content_type); + std::move(content_provider), content_type, + nullptr); } inline Result ClientImpl::Post(const std::string &path, const Headers &headers, @@ -7779,6 +8407,13 @@ inline Result ClientImpl::Post(const std::string &path, const Headers &headers, return Post(path, headers, query, "application/x-www-form-urlencoded"); } +inline Result ClientImpl::Post(const std::string &path, const Headers &headers, + const Params ¶ms, Progress progress) { + auto query = detail::params_to_query_str(params); + return Post(path, headers, query, "application/x-www-form-urlencoded", + progress); +} + inline Result ClientImpl::Post(const std::string &path, const MultipartFormDataItems &items) { return Post(path, Headers(), items); @@ -7816,7 +8451,7 @@ ClientImpl::Post(const std::string &path, const Headers &headers, return send_with_content_provider( "POST", path, headers, nullptr, 0, nullptr, get_multipart_content_provider(boundary, items, provider_items), - content_type); + content_type, nullptr); } inline Result ClientImpl::Put(const std::string &path) { @@ -7833,7 +8468,15 @@ inline Result ClientImpl::Put(const std::string &path, const Headers &headers, const char *body, size_t content_length, const std::string &content_type) { return send_with_content_provider("PUT", path, headers, body, content_length, - nullptr, nullptr, content_type); + nullptr, nullptr, content_type, nullptr); +} + +inline Result ClientImpl::Put(const std::string &path, const Headers &headers, + const char *body, size_t content_length, + const std::string &content_type, + Progress progress) { + return send_with_content_provider("PUT", path, headers, body, content_length, + nullptr, nullptr, content_type, progress); } inline Result ClientImpl::Put(const std::string &path, const std::string &body, @@ -7841,12 +8484,27 @@ inline Result ClientImpl::Put(const std::string &path, const std::string &body, return Put(path, Headers(), body, content_type); } +inline Result ClientImpl::Put(const std::string &path, const std::string &body, + const std::string &content_type, + Progress progress) { + return Put(path, Headers(), body, content_type, progress); +} + inline Result ClientImpl::Put(const std::string &path, const Headers &headers, const std::string &body, const std::string &content_type) { return send_with_content_provider("PUT", path, headers, body.data(), - body.size(), nullptr, nullptr, - content_type); + body.size(), nullptr, nullptr, content_type, + nullptr); +} + +inline Result ClientImpl::Put(const std::string &path, const Headers &headers, + const std::string &body, + const std::string &content_type, + Progress progress) { + return send_with_content_provider("PUT", path, headers, body.data(), + body.size(), nullptr, nullptr, content_type, + progress); } inline Result ClientImpl::Put(const std::string &path, size_t content_length, @@ -7868,14 +8526,15 @@ inline Result ClientImpl::Put(const std::string &path, const Headers &headers, const std::string &content_type) { return send_with_content_provider("PUT", path, headers, nullptr, content_length, std::move(content_provider), - nullptr, content_type); + nullptr, content_type, nullptr); } inline Result ClientImpl::Put(const std::string &path, const Headers &headers, ContentProviderWithoutLength content_provider, const std::string &content_type) { return send_with_content_provider("PUT", path, headers, nullptr, 0, nullptr, - std::move(content_provider), content_type); + std::move(content_provider), content_type, + nullptr); } inline Result ClientImpl::Put(const std::string &path, const Params ¶ms) { @@ -7888,6 +8547,13 @@ inline Result ClientImpl::Put(const std::string &path, const Headers &headers, return Put(path, headers, query, "application/x-www-form-urlencoded"); } +inline Result ClientImpl::Put(const std::string &path, const Headers &headers, + const Params ¶ms, Progress progress) { + auto query = detail::params_to_query_str(params); + return Put(path, headers, query, "application/x-www-form-urlencoded", + progress); +} + inline Result ClientImpl::Put(const std::string &path, const MultipartFormDataItems &items) { return Put(path, Headers(), items); @@ -7925,7 +8591,7 @@ ClientImpl::Put(const std::string &path, const Headers &headers, return send_with_content_provider( "PUT", path, headers, nullptr, 0, nullptr, get_multipart_content_provider(boundary, items, provider_items), - content_type); + content_type, nullptr); } inline Result ClientImpl::Patch(const std::string &path) { return Patch(path, std::string(), std::string()); @@ -7937,12 +8603,26 @@ inline Result ClientImpl::Patch(const std::string &path, const char *body, return Patch(path, Headers(), body, content_length, content_type); } +inline Result ClientImpl::Patch(const std::string &path, const char *body, + size_t content_length, + const std::string &content_type, + Progress progress) { + return Patch(path, Headers(), body, content_length, content_type, progress); +} + inline Result ClientImpl::Patch(const std::string &path, const Headers &headers, const char *body, size_t content_length, const std::string &content_type) { + return Patch(path, headers, body, content_length, content_type, nullptr); +} + +inline Result ClientImpl::Patch(const std::string &path, const Headers &headers, + const char *body, size_t content_length, + const std::string &content_type, + Progress progress) { return send_with_content_provider("PATCH", path, headers, body, content_length, nullptr, nullptr, - content_type); + content_type, progress); } inline Result ClientImpl::Patch(const std::string &path, @@ -7951,12 +8631,26 @@ inline Result ClientImpl::Patch(const std::string &path, return Patch(path, Headers(), body, content_type); } +inline Result ClientImpl::Patch(const std::string &path, + const std::string &body, + const std::string &content_type, + Progress progress) { + return Patch(path, Headers(), body, content_type, progress); +} + inline Result ClientImpl::Patch(const std::string &path, const Headers &headers, const std::string &body, const std::string &content_type) { + return Patch(path, headers, body, content_type, nullptr); +} + +inline Result ClientImpl::Patch(const std::string &path, const Headers &headers, + const std::string &body, + const std::string &content_type, + Progress progress) { return send_with_content_provider("PATCH", path, headers, body.data(), - body.size(), nullptr, nullptr, - content_type); + body.size(), nullptr, nullptr, content_type, + progress); } inline Result ClientImpl::Patch(const std::string &path, size_t content_length, @@ -7978,14 +8672,15 @@ inline Result ClientImpl::Patch(const std::string &path, const Headers &headers, const std::string &content_type) { return send_with_content_provider("PATCH", path, headers, nullptr, content_length, std::move(content_provider), - nullptr, content_type); + nullptr, content_type, nullptr); } inline Result ClientImpl::Patch(const std::string &path, const Headers &headers, ContentProviderWithoutLength content_provider, const std::string &content_type) { return send_with_content_provider("PATCH", path, headers, nullptr, 0, nullptr, - std::move(content_provider), content_type); + std::move(content_provider), content_type, + nullptr); } inline Result ClientImpl::Delete(const std::string &path) { @@ -8003,14 +8698,30 @@ inline Result ClientImpl::Delete(const std::string &path, const char *body, return Delete(path, Headers(), body, content_length, content_type); } +inline Result ClientImpl::Delete(const std::string &path, const char *body, + size_t content_length, + const std::string &content_type, + Progress progress) { + return Delete(path, Headers(), body, content_length, content_type, progress); +} + inline Result ClientImpl::Delete(const std::string &path, const Headers &headers, const char *body, size_t content_length, const std::string &content_type) { + return Delete(path, headers, body, content_length, content_type, nullptr); +} + +inline Result ClientImpl::Delete(const std::string &path, + const Headers &headers, const char *body, + size_t content_length, + const std::string &content_type, + Progress progress) { Request req; req.method = "DELETE"; req.headers = headers; req.path = path; + req.progress = progress; if (!content_type.empty()) { req.set_header("Content-Type", content_type); } req.body.assign(body, content_length); @@ -8024,6 +8735,14 @@ inline Result ClientImpl::Delete(const std::string &path, return Delete(path, Headers(), body.data(), body.size(), content_type); } +inline Result ClientImpl::Delete(const std::string &path, + const std::string &body, + const std::string &content_type, + Progress progress) { + return Delete(path, Headers(), body.data(), body.size(), content_type, + progress); +} + inline Result ClientImpl::Delete(const std::string &path, const Headers &headers, const std::string &body, @@ -8031,6 +8750,15 @@ inline Result ClientImpl::Delete(const std::string &path, return Delete(path, headers, body.data(), body.size(), content_type); } +inline Result ClientImpl::Delete(const std::string &path, + const Headers &headers, + const std::string &body, + const std::string &content_type, + Progress progress) { + return Delete(path, headers, body.data(), body.size(), content_type, + progress); +} + inline Result ClientImpl::Options(const std::string &path) { return Options(path, Headers()); } @@ -8138,6 +8866,8 @@ inline void ClientImpl::set_address_family(int family) { inline void ClientImpl::set_tcp_nodelay(bool on) { tcp_nodelay_ = on; } +inline void ClientImpl::set_ipv6_v6only(bool on) { ipv6_v6only_ = on; } + inline void ClientImpl::set_socket_options(SocketOptions socket_options) { socket_options_ = std::move(socket_options); } @@ -8187,13 +8917,11 @@ inline void ClientImpl::set_ca_cert_store(X509_STORE *ca_cert_store) { inline X509_STORE *ClientImpl::create_ca_cert_store(const char *ca_cert, std::size_t size) const { auto mem = BIO_new_mem_buf(ca_cert, static_cast(size)); + auto se = detail::scope_exit([&] { BIO_free_all(mem); }); if (!mem) { return nullptr; } auto inf = PEM_X509_INFO_read_bio(mem, nullptr, nullptr, nullptr); - if (!inf) { - BIO_free_all(mem); - return nullptr; - } + if (!inf) { return nullptr; } auto cts = X509_STORE_new(); if (cts) { @@ -8207,13 +8935,21 @@ inline X509_STORE *ClientImpl::create_ca_cert_store(const char *ca_cert, } sk_X509_INFO_pop_free(inf, X509_INFO_free); - BIO_free_all(mem); return cts; } inline void ClientImpl::enable_server_certificate_verification(bool enabled) { server_certificate_verification_ = enabled; } + +inline void ClientImpl::enable_server_hostname_verification(bool enabled) { + server_hostname_verification_ = enabled; +} + +inline void ClientImpl::set_server_certificate_verifier( + std::function verifier) { + server_certificate_verifier_ = verifier; +} #endif inline void ClientImpl::set_logger(Logger logger) { @@ -8257,13 +8993,30 @@ inline SSL *ssl_new(socket_t sock, SSL_CTX *ctx, std::mutex &ctx_mutex, return ssl; } -inline void ssl_delete(std::mutex &ctx_mutex, SSL *ssl, +inline void ssl_delete(std::mutex &ctx_mutex, SSL *ssl, socket_t sock, bool shutdown_gracefully) { // sometimes we may want to skip this to try to avoid SIGPIPE if we know // the remote has closed the network connection // Note that it is not always possible to avoid SIGPIPE, this is merely a // best-efforts. - if (shutdown_gracefully) { SSL_shutdown(ssl); } + if (shutdown_gracefully) { +#ifdef _WIN32 + (void)(sock); + SSL_shutdown(ssl); +#else + timeval tv; + tv.tv_sec = 1; + tv.tv_usec = 0; + setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, + reinterpret_cast(&tv), sizeof(tv)); + + auto ret = SSL_shutdown(ssl); + while (ret == 0) { + std::this_thread::sleep_for(std::chrono::milliseconds{100}); + ret = SSL_shutdown(ssl); + } +#endif + } std::lock_guard guard(ctx_mutex); SSL_free(ssl); @@ -8366,7 +9119,7 @@ inline ssize_t SSLSocketStream::read(char *ptr, size_t size) { if (SSL_pending(ssl_) > 0) { return SSL_read(ssl_, ptr, static_cast(size)); } else if (is_readable()) { - std::this_thread::sleep_for(std::chrono::milliseconds(1)); + std::this_thread::sleep_for(std::chrono::microseconds{10}); ret = SSL_read(ssl_, ptr, static_cast(size)); if (ret >= 0) { return ret; } err = SSL_get_error(ssl_, ret); @@ -8397,7 +9150,7 @@ inline ssize_t SSLSocketStream::write(const char *ptr, size_t size) { while (--n >= 0 && err == SSL_ERROR_WANT_WRITE) { #endif if (is_writable()) { - std::this_thread::sleep_for(std::chrono::milliseconds(1)); + std::this_thread::sleep_for(std::chrono::microseconds{10}); ret = SSL_write(ssl_, ptr, static_cast(handle_size)); if (ret >= 0) { return ret; } err = SSL_get_error(ssl_, ret); @@ -8439,7 +9192,7 @@ inline SSLServer::SSLServer(const char *cert_path, const char *private_key_path, SSL_OP_NO_COMPRESSION | SSL_OP_NO_SESSION_RESUMPTION_ON_RENEGOTIATION); - SSL_CTX_set_min_proto_version(ctx_, TLS1_1_VERSION); + SSL_CTX_set_min_proto_version(ctx_, TLS1_2_VERSION); if (private_key_password != nullptr && (private_key_password[0] != '\0')) { SSL_CTX_set_default_passwd_cb_userdata( @@ -8449,7 +9202,8 @@ inline SSLServer::SSLServer(const char *cert_path, const char *private_key_path, if (SSL_CTX_use_certificate_chain_file(ctx_, cert_path) != 1 || SSL_CTX_use_PrivateKey_file(ctx_, private_key_path, SSL_FILETYPE_PEM) != - 1) { + 1 || + SSL_CTX_check_private_key(ctx_) != 1) { SSL_CTX_free(ctx_); ctx_ = nullptr; } else if (client_ca_cert_file_path || client_ca_cert_dir_path) { @@ -8471,7 +9225,7 @@ inline SSLServer::SSLServer(X509 *cert, EVP_PKEY *private_key, SSL_OP_NO_COMPRESSION | SSL_OP_NO_SESSION_RESUMPTION_ON_RENEGOTIATION); - SSL_CTX_set_min_proto_version(ctx_, TLS1_1_VERSION); + SSL_CTX_set_min_proto_version(ctx_, TLS1_2_VERSION); if (SSL_CTX_use_certificate(ctx_, cert) != 1 || SSL_CTX_use_PrivateKey(ctx_, private_key) != 1) { @@ -8505,6 +9259,19 @@ inline bool SSLServer::is_valid() const { return ctx_; } inline SSL_CTX *SSLServer::ssl_context() const { return ctx_; } +inline void SSLServer::update_certs(X509 *cert, EVP_PKEY *private_key, + X509_STORE *client_ca_cert_store) { + + std::lock_guard guard(ctx_mutex_); + + SSL_CTX_use_certificate(ctx_, cert); + SSL_CTX_use_PrivateKey(ctx_, private_key); + + if (client_ca_cert_store != nullptr) { + SSL_CTX_set_cert_store(ctx_, client_ca_cert_store); + } +} + inline bool SSLServer::process_and_close_socket(socket_t sock) { auto ssl = detail::ssl_new( sock, ctx_, ctx_mutex_, @@ -8516,20 +9283,29 @@ inline bool SSLServer::process_and_close_socket(socket_t sock) { auto ret = false; if (ssl) { + std::string remote_addr; + int remote_port = 0; + detail::get_remote_ip_and_port(sock, remote_addr, remote_port); + + std::string local_addr; + int local_port = 0; + detail::get_local_ip_and_port(sock, local_addr, local_port); + ret = detail::process_server_socket_ssl( svr_sock_, ssl, sock, keep_alive_max_count_, keep_alive_timeout_sec_, read_timeout_sec_, read_timeout_usec_, write_timeout_sec_, write_timeout_usec_, - [this, ssl](Stream &strm, bool close_connection, - bool &connection_closed) { - return process_request(strm, close_connection, connection_closed, + [&](Stream &strm, bool close_connection, bool &connection_closed) { + return process_request(strm, remote_addr, remote_port, local_addr, + local_port, close_connection, + connection_closed, [&](Request &req) { req.ssl = ssl; }); }); // Shutdown gracefully if the result seemed successful, non-gracefully if // the connection appeared to be closed. const bool shutdown_gracefully = ret; - detail::ssl_delete(ctx_mutex_, ssl, shutdown_gracefully); + detail::ssl_delete(ctx_mutex_, ssl, sock, shutdown_gracefully); } detail::shutdown_socket(sock); @@ -8551,6 +9327,8 @@ inline SSLClient::SSLClient(const std::string &host, int port, : ClientImpl(host, port, client_cert_path, client_key_path) { ctx_ = SSL_CTX_new(TLS_client_method()); + SSL_CTX_set_min_proto_version(ctx_, TLS1_2_VERSION); + detail::split(&host_[0], &host_[host_.size()], '.', [&](const char *b, const char *e) { host_components_.emplace_back(b, e); @@ -8758,36 +9536,47 @@ inline bool SSLClient::initialize_ssl(Socket &socket, Error &error) { } if (server_certificate_verification_) { - verify_result_ = SSL_get_verify_result(ssl2); + if (server_certificate_verifier_) { + if (!server_certificate_verifier_(ssl2)) { + error = Error::SSLServerVerification; + return false; + } + } else { + verify_result_ = SSL_get_verify_result(ssl2); - if (verify_result_ != X509_V_OK) { - error = Error::SSLServerVerification; - return false; + if (verify_result_ != X509_V_OK) { + error = Error::SSLServerVerification; + return false; + } + + auto server_cert = SSL_get1_peer_certificate(ssl2); + auto se = detail::scope_exit([&] { X509_free(server_cert); }); + + if (server_cert == nullptr) { + error = Error::SSLServerVerification; + return false; + } + + if (server_hostname_verification_) { + if (!verify_host(server_cert)) { + error = Error::SSLServerHostnameVerification; + return false; + } + } } - - auto server_cert = SSL_get1_peer_certificate(ssl2); - - if (server_cert == nullptr) { - error = Error::SSLServerVerification; - return false; - } - - if (!verify_host(server_cert)) { - X509_free(server_cert); - error = Error::SSLServerVerification; - return false; - } - X509_free(server_cert); } return true; }, [&](SSL *ssl2) { +#if defined(OPENSSL_IS_BORINGSSL) + SSL_set_tlsext_host_name(ssl2, host_.c_str()); +#else // NOTE: Direct call instead of using the OpenSSL macro to suppress // -Wold-style-cast warning - // SSL_set_tlsext_host_name(ssl2, host_.c_str()); SSL_ctrl(ssl2, SSL_CTRL_SET_TLSEXT_HOSTNAME, TLSEXT_NAMETYPE_host_name, static_cast(const_cast(host_.c_str()))); +#endif return true; }); @@ -8812,7 +9601,8 @@ inline void SSLClient::shutdown_ssl_impl(Socket &socket, return; } if (socket.ssl) { - detail::ssl_delete(ctx_mutex_, socket.ssl, shutdown_gracefully); + detail::ssl_delete(ctx_mutex_, socket.ssl, socket.sock, + shutdown_gracefully); socket.ssl = nullptr; } assert(socket.ssl == nullptr); @@ -8861,8 +9651,8 @@ SSLClient::verify_host_with_subject_alt_name(X509 *server_cert) const { auto type = GEN_DNS; - struct in6_addr addr6 {}; - struct in_addr addr {}; + struct in6_addr addr6{}; + struct in_addr addr{}; size_t addr_len = 0; #ifndef __MINGW32__ @@ -8965,7 +9755,7 @@ inline Client::Client(const std::string &scheme_host_port, const std::string &client_cert_path, const std::string &client_key_path) { const static std::regex re( - R"((?:([a-z]+):\/\/)?(?:\[([\d:]+)\]|([^:/?#]+))(?::(\d+))?)"); + R"((?:([a-z]+):\/\/)?(?:\[([a-fA-F\d:]+)\]|([^:/?#]+))(?::(\d+))?)"); std::smatch m; if (std::regex_match(scheme_host_port, m, re)) { @@ -9002,10 +9792,12 @@ inline Client::Client(const std::string &scheme_host_port, client_key_path); } } else { + // NOTE: Update TEST(UniversalClientImplTest, Ipv6LiteralAddress) + // if port param below changes. cli_ = detail::make_unique(scheme_host_port, 80, client_cert_path, client_key_path); } -} +} // namespace detail inline Client::Client(const std::string &host, int port) : cli_(detail::make_unique(host, port)) {} @@ -9111,15 +9903,30 @@ inline Result Client::Post(const std::string &path, const Headers &headers, const std::string &content_type) { return cli_->Post(path, headers, body, content_length, content_type); } +inline Result Client::Post(const std::string &path, const Headers &headers, + const char *body, size_t content_length, + const std::string &content_type, Progress progress) { + return cli_->Post(path, headers, body, content_length, content_type, + progress); +} inline Result Client::Post(const std::string &path, const std::string &body, const std::string &content_type) { return cli_->Post(path, body, content_type); } +inline Result Client::Post(const std::string &path, const std::string &body, + const std::string &content_type, Progress progress) { + return cli_->Post(path, body, content_type, progress); +} inline Result Client::Post(const std::string &path, const Headers &headers, const std::string &body, const std::string &content_type) { return cli_->Post(path, headers, body, content_type); } +inline Result Client::Post(const std::string &path, const Headers &headers, + const std::string &body, + const std::string &content_type, Progress progress) { + return cli_->Post(path, headers, body, content_type, progress); +} inline Result Client::Post(const std::string &path, size_t content_length, ContentProvider content_provider, const std::string &content_type) { @@ -9150,6 +9957,10 @@ inline Result Client::Post(const std::string &path, const Headers &headers, const Params ¶ms) { return cli_->Post(path, headers, params); } +inline Result Client::Post(const std::string &path, const Headers &headers, + const Params ¶ms, Progress progress) { + return cli_->Post(path, headers, params, progress); +} inline Result Client::Post(const std::string &path, const MultipartFormDataItems &items) { return cli_->Post(path, items); @@ -9180,15 +9991,29 @@ inline Result Client::Put(const std::string &path, const Headers &headers, const std::string &content_type) { return cli_->Put(path, headers, body, content_length, content_type); } +inline Result Client::Put(const std::string &path, const Headers &headers, + const char *body, size_t content_length, + const std::string &content_type, Progress progress) { + return cli_->Put(path, headers, body, content_length, content_type, progress); +} inline Result Client::Put(const std::string &path, const std::string &body, const std::string &content_type) { return cli_->Put(path, body, content_type); } +inline Result Client::Put(const std::string &path, const std::string &body, + const std::string &content_type, Progress progress) { + return cli_->Put(path, body, content_type, progress); +} inline Result Client::Put(const std::string &path, const Headers &headers, const std::string &body, const std::string &content_type) { return cli_->Put(path, headers, body, content_type); } +inline Result Client::Put(const std::string &path, const Headers &headers, + const std::string &body, + const std::string &content_type, Progress progress) { + return cli_->Put(path, headers, body, content_type, progress); +} inline Result Client::Put(const std::string &path, size_t content_length, ContentProvider content_provider, const std::string &content_type) { @@ -9219,6 +10044,10 @@ inline Result Client::Put(const std::string &path, const Headers &headers, const Params ¶ms) { return cli_->Put(path, headers, params); } +inline Result Client::Put(const std::string &path, const Headers &headers, + const Params ¶ms, Progress progress) { + return cli_->Put(path, headers, params, progress); +} inline Result Client::Put(const std::string &path, const MultipartFormDataItems &items) { return cli_->Put(path, items); @@ -9246,20 +10075,44 @@ inline Result Client::Patch(const std::string &path, const char *body, const std::string &content_type) { return cli_->Patch(path, body, content_length, content_type); } +inline Result Client::Patch(const std::string &path, const char *body, + size_t content_length, + const std::string &content_type, + Progress progress) { + return cli_->Patch(path, body, content_length, content_type, progress); +} inline Result Client::Patch(const std::string &path, const Headers &headers, const char *body, size_t content_length, const std::string &content_type) { return cli_->Patch(path, headers, body, content_length, content_type); } +inline Result Client::Patch(const std::string &path, const Headers &headers, + const char *body, size_t content_length, + const std::string &content_type, + Progress progress) { + return cli_->Patch(path, headers, body, content_length, content_type, + progress); +} inline Result Client::Patch(const std::string &path, const std::string &body, const std::string &content_type) { return cli_->Patch(path, body, content_type); } +inline Result Client::Patch(const std::string &path, const std::string &body, + const std::string &content_type, + Progress progress) { + return cli_->Patch(path, body, content_type, progress); +} inline Result Client::Patch(const std::string &path, const Headers &headers, const std::string &body, const std::string &content_type) { return cli_->Patch(path, headers, body, content_type); } +inline Result Client::Patch(const std::string &path, const Headers &headers, + const std::string &body, + const std::string &content_type, + Progress progress) { + return cli_->Patch(path, headers, body, content_type, progress); +} inline Result Client::Patch(const std::string &path, size_t content_length, ContentProvider content_provider, const std::string &content_type) { @@ -9294,20 +10147,44 @@ inline Result Client::Delete(const std::string &path, const char *body, const std::string &content_type) { return cli_->Delete(path, body, content_length, content_type); } +inline Result Client::Delete(const std::string &path, const char *body, + size_t content_length, + const std::string &content_type, + Progress progress) { + return cli_->Delete(path, body, content_length, content_type, progress); +} inline Result Client::Delete(const std::string &path, const Headers &headers, const char *body, size_t content_length, const std::string &content_type) { return cli_->Delete(path, headers, body, content_length, content_type); } +inline Result Client::Delete(const std::string &path, const Headers &headers, + const char *body, size_t content_length, + const std::string &content_type, + Progress progress) { + return cli_->Delete(path, headers, body, content_length, content_type, + progress); +} inline Result Client::Delete(const std::string &path, const std::string &body, const std::string &content_type) { return cli_->Delete(path, body, content_type); } +inline Result Client::Delete(const std::string &path, const std::string &body, + const std::string &content_type, + Progress progress) { + return cli_->Delete(path, body, content_type, progress); +} inline Result Client::Delete(const std::string &path, const Headers &headers, const std::string &body, const std::string &content_type) { return cli_->Delete(path, headers, body, content_type); } +inline Result Client::Delete(const std::string &path, const Headers &headers, + const std::string &body, + const std::string &content_type, + Progress progress) { + return cli_->Delete(path, headers, body, content_type, progress); +} inline Result Client::Options(const std::string &path) { return cli_->Options(path); } @@ -9417,6 +10294,15 @@ inline void Client::set_proxy_digest_auth(const std::string &username, inline void Client::enable_server_certificate_verification(bool enabled) { cli_->enable_server_certificate_verification(enabled); } + +inline void Client::enable_server_hostname_verification(bool enabled) { + cli_->enable_server_hostname_verification(enabled); +} + +inline void Client::set_server_certificate_verifier( + std::function verifier) { + cli_->set_server_certificate_verifier(verifier); +} #endif inline void Client::set_logger(Logger logger) { diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 64c0c4ef6..d1e8ee829 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -19,6 +19,7 @@ #include "loading.html.hpp" #include +#include #include #include #include @@ -32,6 +33,8 @@ using json = nlohmann::ordered_json; +constexpr int HTTP_POLLING_SECONDS = 1; + enum stop_type { STOP_TYPE_NONE, STOP_TYPE_EOS, @@ -1602,6 +1605,30 @@ struct server_response { // should never reach here } + // same as recv(), but have timeout in seconds + // if timeout is reached, nullptr is returned + server_task_result_ptr recv_with_timeout(const std::unordered_set & id_tasks, int timeout) { + while (true) { + std::unique_lock lock(mutex_results); + bool cr_res = condition_results.wait_for(lock, std::chrono::seconds(timeout), [&]{ + return !queue_results.empty(); + }); + if (!cr_res) { + return nullptr; + } + + for (int i = 0; i < (int) queue_results.size(); i++) { + if (id_tasks.find(queue_results[i]->id) != id_tasks.end()) { + server_task_result_ptr res = std::move(queue_results[i]); + queue_results.erase(queue_results.begin() + i); + return res; + } + } + } + + // should never reach here + } + // single-task version of recv() server_task_result_ptr recv(int id_task) { std::unordered_set id_tasks = {id_task}; @@ -2322,10 +2349,21 @@ struct server_context { void receive_multi_results( const std::unordered_set & id_tasks, const std::function&)> & result_handler, - const std::function & error_handler) { + const std::function & error_handler, + const std::function & is_connection_closed) { std::vector results(id_tasks.size()); - for (size_t i = 0; i < id_tasks.size(); i++) { - server_task_result_ptr result = queue_results.recv(id_tasks); + for (int i = 0; i < (int)id_tasks.size(); i++) { + server_task_result_ptr result = queue_results.recv_with_timeout(id_tasks, HTTP_POLLING_SECONDS); + + if (is_connection_closed()) { + cancel_tasks(id_tasks); + return; + } + + if (result == nullptr) { + i--; // retry + continue; + } if (result->is_error()) { error_handler(result->to_json()); @@ -2349,10 +2387,20 @@ struct server_context { void receive_cmpl_results_stream( const std::unordered_set & id_tasks, const std::function & result_handler, - const std::function & error_handler) { + const std::function & error_handler, + const std::function & is_connection_closed) { size_t n_finished = 0; while (true) { - server_task_result_ptr result = queue_results.recv(id_tasks); + server_task_result_ptr result = queue_results.recv_with_timeout(id_tasks, HTTP_POLLING_SECONDS); + + if (is_connection_closed()) { + cancel_tasks(id_tasks); + return; + } + + if (result == nullptr) { + continue; // retry + } if (result->is_error()) { error_handler(result->to_json()); @@ -3633,6 +3681,7 @@ int main(int argc, char ** argv) { const auto handle_completions_impl = [&ctx_server, &res_error, &res_ok]( server_task_type type, json & data, + std::function is_connection_closed, httplib::Response & res, oaicompat_type oaicompat) { GGML_ASSERT(type == SERVER_TASK_TYPE_COMPLETION || type == SERVER_TASK_TYPE_INFILL); @@ -3694,7 +3743,7 @@ int main(int argc, char ** argv) { } }, [&](const json & error_data) { res_error(res, error_data); - }); + }, is_connection_closed); ctx_server.queue_results.remove_waiting_task_ids(task_ids); } else { @@ -3704,6 +3753,7 @@ int main(int argc, char ** argv) { if (res_json.is_array()) { for (const auto & res : res_json) { if (!server_sent_event(sink, "data", res)) { + // sending failed (HTTP connection closed), cancel the generation return false; } } @@ -3713,6 +3763,9 @@ int main(int argc, char ** argv) { } }, [&](const json & error_data) { server_sent_event(sink, "error", error_data); + }, [&sink]() { + // note: do not use req.is_connection_closed here because req is already destroyed + return !sink.is_writable(); }); if (oaicompat != OAICOMPAT_TYPE_NONE) { static const std::string ev_done = "data: [DONE]\n\n"; @@ -3735,6 +3788,7 @@ int main(int argc, char ** argv) { return handle_completions_impl( SERVER_TASK_TYPE_COMPLETION, data, + req.is_connection_closed, res, OAICOMPAT_TYPE_NONE); }; @@ -3744,6 +3798,7 @@ int main(int argc, char ** argv) { return handle_completions_impl( SERVER_TASK_TYPE_COMPLETION, data, + req.is_connection_closed, res, OAICOMPAT_TYPE_COMPLETION); }; @@ -3820,6 +3875,7 @@ int main(int argc, char ** argv) { return handle_completions_impl( SERVER_TASK_TYPE_INFILL, data, + req.is_connection_closed, res, OAICOMPAT_TYPE_NONE); // infill is not OAI compatible }; @@ -3834,6 +3890,7 @@ int main(int argc, char ** argv) { return handle_completions_impl( SERVER_TASK_TYPE_COMPLETION, data, + req.is_connection_closed, res, OAICOMPAT_TYPE_CHAT); }; @@ -3980,7 +4037,7 @@ int main(int argc, char ** argv) { }, [&](const json & error_data) { res_error(res, error_data); error = true; - }); + }, req.is_connection_closed); ctx_server.queue_results.remove_waiting_task_ids(task_ids); } @@ -4070,7 +4127,7 @@ int main(int argc, char ** argv) { }, [&](const json & error_data) { res_error(res, error_data); error = true; - }); + }, req.is_connection_closed); } if (error) { diff --git a/examples/server/tests/unit/test_completion.py b/examples/server/tests/unit/test_completion.py index e5e3b6077..c1fc12462 100644 --- a/examples/server/tests/unit/test_completion.py +++ b/examples/server/tests/unit/test_completion.py @@ -1,4 +1,5 @@ import pytest +import requests import time from openai import OpenAI from utils import * @@ -405,3 +406,23 @@ def test_n_probs_post_sampling(): assert "bytes" in prob and type(prob["bytes"]) == list # because the test model usually output token with either 100% or 0% probability, we need to check all the top_probs assert any(prob["prob"] == 1.0 for prob in tok["top_probs"]) + + +def test_cancel_request(): + global server + server.n_ctx = 4096 + server.n_predict = -1 + server.n_slots = 1 + server.server_slots = True + server.start() + # send a request that will take a long time, but cancel it before it finishes + try: + server.make_request("POST", "/completion", data={ + "prompt": "I believe the meaning of life is", + }, timeout=0.1) + except requests.exceptions.ReadTimeout: + pass # expected + # make sure the slot is free + time.sleep(1) # wait for HTTP_POLLING_SECONDS + res = server.make_request("GET", "/slots") + assert res.body[0]["is_processing"] == False diff --git a/examples/server/tests/utils.py b/examples/server/tests/utils.py index a1a94d0f1..73be4c92f 100644 --- a/examples/server/tests/utils.py +++ b/examples/server/tests/utils.py @@ -219,17 +219,18 @@ class ServerProcess: path: str, data: dict | Any | None = None, headers: dict | None = None, + timeout: float | None = None, ) -> ServerResponse: url = f"http://{self.server_host}:{self.server_port}{path}" parse_body = False if method == "GET": - response = requests.get(url, headers=headers) + response = requests.get(url, headers=headers, timeout=timeout) parse_body = True elif method == "POST": - response = requests.post(url, headers=headers, json=data) + response = requests.post(url, headers=headers, json=data, timeout=timeout) parse_body = True elif method == "OPTIONS": - response = requests.options(url, headers=headers) + response = requests.options(url, headers=headers, timeout=timeout) else: raise ValueError(f"Unimplemented method: {method}") result = ServerResponse() From 4dd34ff83165a483ebff7bd43621b28490fa1fd6 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sat, 18 Jan 2025 16:18:15 +0200 Subject: [PATCH 5/9] cmake : add sanitizer flags for llama.cpp (#11279) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * cmake : add sanitizer flags for llama.cpp ggml-ci * tests : fix compile warnings ggml-ci * cmake : move sanitizer flags to llama_add_compile_flags ggml-ci * cmake : move llama.cpp compile flags to top level lists ggml-ci * cmake : apply only sanitizer flags at top level ggml-ci * tests : fix gguf context use in same_tensor_data * gguf-test: tensor data comparison * dummy : trigger ggml-ci * unicode : silence gcc warnings ggml-ci * ci : use sanitizer builds only in Debug mode ggml-ci * cmake : add status messages [no ci] --------- Co-authored-by: Johannes Gäßler --- .github/workflows/build.yml | 2 +- CMakeLists.txt | 73 +++++++++++++++++++++++++------------ ggml/src/gguf.cpp | 4 ++ src/unicode.cpp | 5 +-- tests/CMakeLists.txt | 2 + tests/test-gguf.cpp | 28 ++++++++------ tests/test-sampling.cpp | 1 - 7 files changed, 74 insertions(+), 41 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c85999b89..9e0c4a675 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -234,7 +234,7 @@ jobs: strategy: matrix: sanitizer: [ADDRESS, THREAD, UNDEFINED] - build_type: [Debug, Release] + build_type: [Debug] steps: - name: Clone diff --git a/CMakeLists.txt b/CMakeLists.txt index a717a508f..42caed486 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -83,11 +83,8 @@ include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake) include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/common.cmake) # override ggml options -set(GGML_SANITIZE_THREAD ${LLAMA_SANITIZE_THREAD}) -set(GGML_SANITIZE_ADDRESS ${LLAMA_SANITIZE_ADDRESS}) -set(GGML_SANITIZE_UNDEFINED ${LLAMA_SANITIZE_UNDEFINED}) -set(GGML_ALL_WARNINGS ${LLAMA_ALL_WARNINGS}) -set(GGML_FATAL_WARNINGS ${LLAMA_FATAL_WARNINGS}) +set(GGML_ALL_WARNINGS ${LLAMA_ALL_WARNINGS}) +set(GGML_FATAL_WARNINGS ${LLAMA_FATAL_WARNINGS}) # change the default for these ggml options if (NOT DEFINED GGML_LLAMAFILE) @@ -117,16 +114,62 @@ llama_option_depr(WARNING LLAMA_SYCL GGML_SYCL) llama_option_depr(WARNING LLAMA_SYCL_F16 GGML_SYCL_F16) llama_option_depr(WARNING LLAMA_CANN GGML_CANN) +if (NOT MSVC) + if (LLAMA_SANITIZE_THREAD) + message(STATUS "Using -fsanitize=thread") + + add_compile_options(-fsanitize=thread) + link_libraries (-fsanitize=thread) + endif() + + if (LLAMA_SANITIZE_ADDRESS) + message(STATUS "Using -fsanitize=address") + + add_compile_options(-fsanitize=address -fno-omit-frame-pointer) + link_libraries (-fsanitize=address) + endif() + + if (LLAMA_SANITIZE_UNDEFINED) + message(STATUS "Using -fsanitize=undefined") + + add_compile_options(-fsanitize=undefined) + link_libraries (-fsanitize=undefined) + endif() +endif() + # -# build the library +# 3rd-party # if (NOT TARGET ggml) add_subdirectory(ggml) # ... otherwise assume ggml is added by a parent CMakeLists.txt endif() + +# +# build the library +# + add_subdirectory(src) +# +# utils, programs, examples and tests +# + +if (LLAMA_BUILD_COMMON) + add_subdirectory(common) +endif() + +if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION) + include(CTest) + add_subdirectory(tests) +endif() + +if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_EXAMPLES) + add_subdirectory(examples) + add_subdirectory(pocs) +endif() + # # install # @@ -200,21 +243,3 @@ configure_file(cmake/llama.pc.in install(FILES "${CMAKE_CURRENT_BINARY_DIR}/llama.pc" DESTINATION lib/pkgconfig) - -# -# utils, programs, examples and tests -# - -if (LLAMA_BUILD_COMMON) - add_subdirectory(common) -endif() - -if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION) - include(CTest) - add_subdirectory(tests) -endif() - -if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_EXAMPLES) - add_subdirectory(examples) - add_subdirectory(pocs) -endif() diff --git a/ggml/src/gguf.cpp b/ggml/src/gguf.cpp index 655ed600a..ab13669c5 100644 --- a/ggml/src/gguf.cpp +++ b/ggml/src/gguf.cpp @@ -648,6 +648,10 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par ok = ok && data != nullptr; + if (ok) { + ggml_set_name(data, "GGUF tensor data binary blob"); + } + // read the binary blob with the tensor data ok = ok && gr.read(data->data, ctx->size); diff --git a/src/unicode.cpp b/src/unicode.cpp index 7aca6544b..89180da41 100644 --- a/src/unicode.cpp +++ b/src/unicode.cpp @@ -7,18 +7,17 @@ #include #include +#include #include #include +#include #include #include #include #include #include -#include #include #include -#include -#include size_t unicode_len_utf8(char src) { const size_t lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4 }; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 2b5e5fd4a..3fa43c295 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,3 +1,5 @@ +llama_add_compile_flags() + function(llama_test target) include(CMakeParseArguments) set(options) diff --git a/tests/test-gguf.cpp b/tests/test-gguf.cpp index 611957ac0..6ed696328 100644 --- a/tests/test-gguf.cpp +++ b/tests/test-gguf.cpp @@ -48,7 +48,7 @@ enum handcrafted_file_type { HANDCRAFTED_DATA_CUSTOM_ALIGN = 810 + offset_has_data, }; -std::string handcrafted_file_type_name(const enum handcrafted_file_type hft) { +static std::string handcrafted_file_type_name(const enum handcrafted_file_type hft) { switch (hft) { case HANDCRAFTED_HEADER_BAD_MAGIC: return "HEADER_BAD_MAGIC"; case HANDCRAFTED_HEADER_BAD_VERSION_1: return "HEADER_BAD_VERSION_1"; @@ -99,7 +99,7 @@ static bool expect_context_not_null(const enum handcrafted_file_type hft) { typedef std::pair> tensor_config_t; -std::vector get_tensor_configs(std::mt19937 & rng) { +static std::vector get_tensor_configs(std::mt19937 & rng) { std::vector tensor_configs; tensor_configs.reserve(100); @@ -122,7 +122,7 @@ std::vector get_tensor_configs(std::mt19937 & rng) { return tensor_configs; } -std::vector> get_kv_types(std::mt19937 rng) { +static std::vector> get_kv_types(std::mt19937 rng) { std::vector> kv_types; kv_types.reserve(100); @@ -626,8 +626,6 @@ static bool handcrafted_check_tensor_data(const gguf_context * gguf_ctx, const u bool ok = true; - const uint32_t alignment = GGUF_DEFAULT_ALIGNMENT; - for (int i = 0; i < int(tensor_configs.size()); ++i) { const ggml_type type = tensor_configs[i].first; const std::array shape = tensor_configs[i].second; @@ -866,13 +864,13 @@ static struct random_gguf_context_result get_random_gguf_context(ggml_backend_t case GGUF_TYPE_COUNT: default: { GGML_ABORT("fatal error"); - } break; + } } } break; case GGUF_TYPE_COUNT: default: { GGML_ABORT("fatal error"); - } break; + } } } @@ -938,7 +936,7 @@ static bool all_kv_in_other(const gguf_context * ctx, const gguf_context * other } if (type == GGUF_TYPE_ARRAY) { - const int arr_n = gguf_get_arr_n(ctx, id); + const size_t arr_n = gguf_get_arr_n(ctx, id); if (arr_n != gguf_get_arr_n(other, idx_other)) { ok = false; continue; @@ -953,7 +951,7 @@ static bool all_kv_in_other(const gguf_context * ctx, const gguf_context * other if (type_arr == GGUF_TYPE_BOOL) { const int8_t * data = reinterpret_cast(gguf_get_arr_data(ctx, id)); const int8_t * data_other = reinterpret_cast(gguf_get_arr_data(other, idx_other)); - for (int arr_i = 0; arr_i < arr_n; ++arr_i) { + for (size_t arr_i = 0; arr_i < arr_n; ++arr_i) { if (bool(data[arr_i]) != bool(data_other[arr_i])) { ok = false; } @@ -962,7 +960,7 @@ static bool all_kv_in_other(const gguf_context * ctx, const gguf_context * other } if (type_arr == GGUF_TYPE_STRING) { - for (int arr_i = 0; arr_i < arr_n; ++arr_i) { + for (size_t arr_i = 0; arr_i < arr_n; ++arr_i) { const std::string str = gguf_get_arr_str(ctx, id, arr_i); const std::string str_other = gguf_get_arr_str(other, idx_other, arr_i); if (str != str_other) { @@ -1033,6 +1031,12 @@ static bool same_tensor_data(const struct ggml_context * orig, const struct ggml struct ggml_tensor * t_orig = ggml_get_first_tensor(orig); struct ggml_tensor * t_read = ggml_get_first_tensor(read); + + if (std::string(t_read->name) != "GGUF tensor data binary blob") { + return false; + } + t_read = ggml_get_next_tensor(read, t_read); + while (t_orig) { if (!t_read) { ok = false; @@ -1051,13 +1055,13 @@ static bool same_tensor_data(const struct ggml_context * orig, const struct ggml } t_orig = ggml_get_next_tensor(orig, t_orig); - t_read = ggml_get_next_tensor(orig, t_read); + t_read = ggml_get_next_tensor(read, t_read); } if (t_read) { ok = false; } - return true; + return ok; } static std::pair test_roundtrip(ggml_backend_dev_t dev, const unsigned int seed, const bool only_meta) { diff --git a/tests/test-sampling.cpp b/tests/test-sampling.cpp index c0dcb4848..61bd67850 100644 --- a/tests/test-sampling.cpp +++ b/tests/test-sampling.cpp @@ -144,7 +144,6 @@ static void test_penalties( sampler_tester tester(probs, probs_expected); - const size_t n_vocab = probs.size(); auto * sampler = llama_sampler_init_penalties(last_tokens.size(), repeat_penalty, alpha_frequency, alpha_presence); for (size_t i = 0; i < last_tokens.size(); i++) { From a1649cc13f89946322358f92ea268ae1b7b5096c Mon Sep 17 00:00:00 2001 From: Eric Curtin Date: Sat, 18 Jan 2025 14:42:31 +0000 Subject: [PATCH 6/9] Adding linenoise.cpp to llama-run (#11252) This is a fork of linenoise that is C++17 compatible. I intend on adding it to llama-run so we can do things like traverse prompt history via the up and down arrows: https://github.com/ericcurtin/linenoise.cpp Signed-off-by: Eric Curtin --- .github/workflows/build.yml | 4 + examples/run/CMakeLists.txt | 2 +- examples/run/linenoise.cpp/LICENSE | 26 + examples/run/linenoise.cpp/linenoise.cpp | 1351 ++++++++++++++++++++++ examples/run/linenoise.cpp/linenoise.h | 114 ++ examples/run/run.cpp | 38 +- 6 files changed, 1524 insertions(+), 11 deletions(-) create mode 100644 examples/run/linenoise.cpp/LICENSE create mode 100644 examples/run/linenoise.cpp/linenoise.cpp create mode 100644 examples/run/linenoise.cpp/linenoise.h diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 9e0c4a675..fe3b2cdfa 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -87,6 +87,7 @@ jobs: if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} run: | cp LICENSE ./build/bin/ + cp examples/run/linenoise.cpp/LICENSE ./build/bin/LICENSE.linenoise.cpp zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip ./build/bin/* - name: Upload artifacts @@ -149,6 +150,7 @@ jobs: if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} run: | cp LICENSE ./build/bin/ + cp examples/run/linenoise.cpp/LICENSE ./build/bin/LICENSE.linenoise.cpp zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip ./build/bin/* - name: Upload artifacts @@ -217,6 +219,7 @@ jobs: if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} run: | cp LICENSE ./build/bin/ + cp examples/run/linenoise.cpp/LICENSE ./build/bin/LICENSE.linenoise.cpp zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.zip ./build/bin/* - name: Upload artifacts @@ -796,6 +799,7 @@ jobs: if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} run: | Copy-Item LICENSE .\build\bin\Release\llama.cpp.txt + Copy-Item .\examples\run\linenoise.cpp\LICENSE .\build\bin\Release\linenoise.cpp.txt 7z a llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}.zip .\build\bin\Release\* - name: Upload artifacts diff --git a/examples/run/CMakeLists.txt b/examples/run/CMakeLists.txt index 0686d6305..cd6b0520e 100644 --- a/examples/run/CMakeLists.txt +++ b/examples/run/CMakeLists.txt @@ -1,5 +1,5 @@ set(TARGET llama-run) -add_executable(${TARGET} run.cpp) +add_executable(${TARGET} run.cpp linenoise.cpp/linenoise.cpp) install(TARGETS ${TARGET} RUNTIME) target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_17) diff --git a/examples/run/linenoise.cpp/LICENSE b/examples/run/linenoise.cpp/LICENSE new file mode 100644 index 000000000..b006b3b24 --- /dev/null +++ b/examples/run/linenoise.cpp/LICENSE @@ -0,0 +1,26 @@ +Copyright (c) 2010-2014, Salvatore Sanfilippo +Copyright (c) 2010-2013, Pieter Noordhuis +Copyright (c) 2025, Eric Curtin + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/examples/run/linenoise.cpp/linenoise.cpp b/examples/run/linenoise.cpp/linenoise.cpp new file mode 100644 index 000000000..050c23012 --- /dev/null +++ b/examples/run/linenoise.cpp/linenoise.cpp @@ -0,0 +1,1351 @@ +#ifndef _WIN32 +/* + * You can find the latest source code at: + * + * http://github.com/ericcurtin/linenoise.cpp + * + * Does a number of crazy assumptions that happen to be true in 99.9999% of + * the 2010 UNIX computers around. + * + * ------------------------------------------------------------------------ + * + * Copyright (c) 2010-2023, Salvatore Sanfilippo + * Copyright (c) 2010-2013, Pieter Noordhuis + * Copyright (c) 2025, Eric Curtin + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ------------------------------------------------------------------------ + * + * References: + * - http://invisible-island.net/xterm/ctlseqs/ctlseqs.html + * - http://www.3waylabs.com/nw/WWW/products/wizcon/vt220.html + * + * Todo list: + * - Filter bogus Ctrl+ combinations. + * - Win32 support + * + * Bloat: + * - History search like Ctrl+r in readline? + * + * List of escape sequences used by this program, we do everything just + * with three sequences. In order to be so cheap we may have some + * flickering effect with some slow terminal, but the lesser sequences + * the more compatible. + * + * EL (Erase Line) + * Sequence: ESC [ n K + * Effect: if n is 0 or missing, clear from cursor to end of line + * Effect: if n is 1, clear from beginning of line to cursor + * Effect: if n is 2, clear entire line + * + * CUF (CUrsor Forward) + * Sequence: ESC [ n C + * Effect: moves cursor forward n chars + * + * CUB (CUrsor Backward) + * Sequence: ESC [ n D + * Effect: moves cursor backward n chars + * + * The following is used to get the terminal width if getting + * the width with the TIOCGWINSZ ioctl fails + * + * DSR (Device Status Report) + * Sequence: ESC [ 6 n + * Effect: reports the current cusor position as ESC [ n ; m R + * where n is the row and m is the column + * + * When multi line mode is enabled, we also use an additional escape + * sequence. However multi line editing is disabled by default. + * + * CUU (Cursor Up) + * Sequence: ESC [ n A + * Effect: moves cursor up of n chars. + * + * CUD (Cursor Down) + * Sequence: ESC [ n B + * Effect: moves cursor down of n chars. + * + * When linenoiseClearScreen() is called, two additional escape sequences + * are used in order to clear the screen and position the cursor at home + * position. + * + * CUP (Cursor position) + * Sequence: ESC [ H + * Effect: moves the cursor to upper left corner + * + * ED (Erase display) + * Sequence: ESC [ 2 J + * Effect: clear the whole screen + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "linenoise.h" + +#define LINENOISE_DEFAULT_HISTORY_MAX_LEN 100 +#define LINENOISE_MAX_LINE 4096 +static std::vector unsupported_term = {"dumb","cons25","emacs",nullptr}; +static linenoiseCompletionCallback *completionCallback = NULL; +static linenoiseHintsCallback *hintsCallback = NULL; +static linenoiseFreeHintsCallback *freeHintsCallback = NULL; +static char *linenoiseNoTTY(void); +static void refreshLineWithCompletion(struct linenoiseState *ls, linenoiseCompletions *lc, int flags); +static void refreshLineWithFlags(struct linenoiseState *l, int flags); + +static struct termios orig_termios; /* In order to restore at exit.*/ +static int maskmode = 0; /* Show "***" instead of input. For passwords. */ +static int rawmode = 0; /* For atexit() function to check if restore is needed*/ +static int mlmode = 0; /* Multi line mode. Default is single line. */ +static int atexit_registered = 0; /* Register atexit just 1 time. */ +static int history_max_len = LINENOISE_DEFAULT_HISTORY_MAX_LEN; +static int history_len = 0; +static char **history = NULL; + +enum KEY_ACTION{ + KEY_NULL = 0, /* NULL */ + CTRL_A = 1, /* Ctrl+a */ + CTRL_B = 2, /* Ctrl-b */ + CTRL_C = 3, /* Ctrl-c */ + CTRL_D = 4, /* Ctrl-d */ + CTRL_E = 5, /* Ctrl-e */ + CTRL_F = 6, /* Ctrl-f */ + CTRL_H = 8, /* Ctrl-h */ + TAB = 9, /* Tab */ + CTRL_K = 11, /* Ctrl+k */ + CTRL_L = 12, /* Ctrl+l */ + ENTER = 13, /* Enter */ + CTRL_N = 14, /* Ctrl-n */ + CTRL_P = 16, /* Ctrl-p */ + CTRL_T = 20, /* Ctrl-t */ + CTRL_U = 21, /* Ctrl+u */ + CTRL_W = 23, /* Ctrl+w */ + ESC = 27, /* Escape */ + BACKSPACE = 127 /* Backspace */ +}; + +static void linenoiseAtExit(void); +int linenoiseHistoryAdd(const char *line); +#define REFRESH_CLEAN (1<<0) // Clean the old prompt from the screen +#define REFRESH_WRITE (1<<1) // Rewrite the prompt on the screen. +#define REFRESH_ALL (REFRESH_CLEAN|REFRESH_WRITE) // Do both. +static void refreshLine(struct linenoiseState *l); + +__attribute__((format(printf, 1, 2))) +/* Debugging function. */ +#if 0 +static void lndebug(const char *fmt, ...) { + static FILE *lndebug_fp = NULL; + if (lndebug_fp == NULL) { + lndebug_fp = fopen("/tmp/lndebug.txt", "a"); + } + + if (lndebug_fp != NULL) { + va_list args; + va_start(args, fmt); + vfprintf(lndebug_fp, fmt, args); + va_end(args); + fflush(lndebug_fp); + } +} +#else +static void lndebug(const char *, ...) { +} +#endif + +/* ======================= Low level terminal handling ====================== */ + +/* Enable "mask mode". When it is enabled, instead of the input that + * the user is typing, the terminal will just display a corresponding + * number of asterisks, like "****". This is useful for passwords and other + * secrets that should not be displayed. */ +void linenoiseMaskModeEnable(void) { + maskmode = 1; +} + +/* Disable mask mode. */ +void linenoiseMaskModeDisable(void) { + maskmode = 0; +} + +/* Set if to use or not the multi line mode. */ +void linenoiseSetMultiLine(int ml) { + mlmode = ml; +} + +/* Return true if the terminal name is in the list of terminals we know are + * not able to understand basic escape sequences. */ +static int isUnsupportedTerm(void) { + char *term = getenv("TERM"); + if (term == NULL) return 0; + for (int j = 0; unsupported_term[j]; ++j) + if (!strcasecmp(term, unsupported_term[j])) return 1; + return 0; +} + +/* Raw mode: 1960 magic shit. */ +static int enableRawMode(int fd) { + struct termios raw; + + if (!isatty(STDIN_FILENO)) goto fatal; + if (!atexit_registered) { + atexit(linenoiseAtExit); + atexit_registered = 1; + } + if (tcgetattr(fd,&orig_termios) == -1) goto fatal; + + raw = orig_termios; /* modify the original mode */ + /* input modes: no break, no CR to NL, no parity check, no strip char, + * no start/stop output control. */ + raw.c_iflag &= ~(BRKINT | ICRNL | INPCK | ISTRIP | IXON); + /* output modes - disable post processing */ + raw.c_oflag &= ~(OPOST); + /* control modes - set 8 bit chars */ + raw.c_cflag |= (CS8); + /* local modes - choing off, canonical off, no extended functions, + * no signal chars (^Z,^C) */ + raw.c_lflag &= ~(ECHO | ICANON | IEXTEN | ISIG); + /* control chars - set return condition: min number of bytes and timer. + * We want read to return every single byte, without timeout. */ + raw.c_cc[VMIN] = 1; raw.c_cc[VTIME] = 0; /* 1 byte, no timer */ + + /* put terminal in raw mode after flushing */ + if (tcsetattr(fd,TCSAFLUSH,&raw) < 0) goto fatal; + rawmode = 1; + return 0; + +fatal: + errno = ENOTTY; + return -1; +} + +static void disableRawMode(int fd) { + /* Don't even check the return value as it's too late. */ + if (rawmode && tcsetattr(fd,TCSAFLUSH,&orig_termios) != -1) + rawmode = 0; +} + +/* Use the ESC [6n escape sequence to query the horizontal cursor position + * and return it. On error -1 is returned, on success the position of the + * cursor. */ +static int getCursorPosition(int ifd, int ofd) { + char buf[32]; + int cols, rows; + unsigned int i = 0; + + /* Report cursor location */ + if (write(ofd, "\x1b[6n", 4) != 4) return -1; + + /* Read the response: ESC [ rows ; cols R */ + while (i < sizeof(buf)-1) { + if (read(ifd,buf+i,1) != 1) break; + if (buf[i] == 'R') break; + i++; + } + buf[i] = '\0'; + + /* Parse it. */ + if (buf[0] != ESC || buf[1] != '[') return -1; + if (sscanf(buf+2,"%d;%d",&rows,&cols) != 2) return -1; + return cols; +} + +/* Try to get the number of columns in the current terminal, or assume 80 + * if it fails. */ +static int getColumns(int ifd, int ofd) { + struct winsize ws; + + if (ioctl(1, TIOCGWINSZ, &ws) == -1 || ws.ws_col == 0) { + /* ioctl() failed. Try to query the terminal itself. */ + int start, cols; + + /* Get the initial position so we can restore it later. */ + start = getCursorPosition(ifd,ofd); + if (start == -1) goto failed; + + /* Go to right margin and get position. */ + if (write(ofd,"\x1b[999C",6) != 6) goto failed; + cols = getCursorPosition(ifd,ofd); + if (cols == -1) goto failed; + + /* Restore position. */ + if (cols > start) { + char seq[32]; + snprintf(seq,32,"\x1b[%dD",cols-start); + if (write(ofd,seq,strlen(seq)) == -1) { + /* Can't recover... */ + } + } + return cols; + } else { + return ws.ws_col; + } + +failed: + return 80; +} + +/* Clear the screen. Used to handle ctrl+l */ +void linenoiseClearScreen(void) { + if (write(STDOUT_FILENO,"\x1b[H\x1b[2J",7) <= 0) { + /* nothing to do, just to avoid warning. */ + } +} + +/* Beep, used for completion when there is nothing to complete or when all + * the choices were already shown. */ +static void linenoiseBeep(void) { + fprintf(stderr, "\x7"); + fflush(stderr); +} + +/* ============================== Completion ================================ */ + +/* Free a list of completion option populated by linenoiseAddCompletion(). */ +static void freeCompletions(linenoiseCompletions *lc) { + size_t i; + for (i = 0; i < lc->len; i++) + free(lc->cvec[i]); + if (lc->cvec != NULL) + free(lc->cvec); +} + +/* Called by completeLine() and linenoiseShow() to render the current + * edited line with the proposed completion. If the current completion table + * is already available, it is passed as second argument, otherwise the + * function will use the callback to obtain it. + * + * Flags are the same as refreshLine*(), that is REFRESH_* macros. */ +static void refreshLineWithCompletion(struct linenoiseState *ls, linenoiseCompletions *lc, int flags) { + /* Obtain the table of completions if the caller didn't provide one. */ + linenoiseCompletions ctable = { 0, NULL }; + if (lc == NULL) { + completionCallback(ls->buf,&ctable); + lc = &ctable; + } + + /* Show the edited line with completion if possible, or just refresh. */ + if (ls->completion_idx < lc->len) { + struct linenoiseState saved = *ls; + ls->len = ls->pos = strlen(lc->cvec[ls->completion_idx]); + ls->buf = lc->cvec[ls->completion_idx]; + refreshLineWithFlags(ls,flags); + ls->len = saved.len; + ls->pos = saved.pos; + ls->buf = saved.buf; + } else { + refreshLineWithFlags(ls,flags); + } + + /* Free the completions table if needed. */ + if (lc != &ctable) freeCompletions(&ctable); +} + +/* This is an helper function for linenoiseEdit*() and is called when the + * user types the key in order to complete the string currently in the + * input. + * + * The state of the editing is encapsulated into the pointed linenoiseState + * structure as described in the structure definition. + * + * If the function returns non-zero, the caller should handle the + * returned value as a byte read from the standard input, and process + * it as usually: this basically means that the function may return a byte + * read from the termianl but not processed. Otherwise, if zero is returned, + * the input was consumed by the completeLine() function to navigate the + * possible completions, and the caller should read for the next characters + * from stdin. */ +static int completeLine(struct linenoiseState *ls, int keypressed) { + linenoiseCompletions lc = { 0, NULL }; + int nwritten; + char c = keypressed; + + completionCallback(ls->buf,&lc); + if (lc.len == 0) { + linenoiseBeep(); + ls->in_completion = 0; + } else { + switch(c) { + case 9: /* tab */ + if (ls->in_completion == 0) { + ls->in_completion = 1; + ls->completion_idx = 0; + } else { + ls->completion_idx = (ls->completion_idx+1) % (lc.len+1); + if (ls->completion_idx == lc.len) linenoiseBeep(); + } + c = 0; + break; + case 27: /* escape */ + /* Re-show original buffer */ + if (ls->completion_idx < lc.len) refreshLine(ls); + ls->in_completion = 0; + c = 0; + break; + default: + /* Update buffer and return */ + if (ls->completion_idx < lc.len) { + nwritten = snprintf(ls->buf,ls->buflen,"%s", + lc.cvec[ls->completion_idx]); + ls->len = ls->pos = nwritten; + } + ls->in_completion = 0; + break; + } + + /* Show completion or original buffer */ + if (ls->in_completion && ls->completion_idx < lc.len) { + refreshLineWithCompletion(ls,&lc,REFRESH_ALL); + } else { + refreshLine(ls); + } + } + + freeCompletions(&lc); + return c; /* Return last read character */ +} + +/* Register a callback function to be called for tab-completion. */ +void linenoiseSetCompletionCallback(linenoiseCompletionCallback *fn) { + completionCallback = fn; +} + +/* Register a hits function to be called to show hits to the user at the + * right of the prompt. */ +void linenoiseSetHintsCallback(linenoiseHintsCallback *fn) { + hintsCallback = fn; +} + +/* Register a function to free the hints returned by the hints callback + * registered with linenoiseSetHintsCallback(). */ +void linenoiseSetFreeHintsCallback(linenoiseFreeHintsCallback *fn) { + freeHintsCallback = fn; +} + +/* This function is used by the callback function registered by the user + * in order to add completion options given the input string when the + * user typed . See the example.c source code for a very easy to + * understand example. */ +void linenoiseAddCompletion(linenoiseCompletions *lc, const char *str) { + size_t len = strlen(str); + char *copy, **cvec; + + copy = (char*) malloc(len + 1); + if (copy == NULL) return; + memcpy(copy,str,len+1); + cvec = (char**) realloc(lc->cvec,sizeof(char*)*(lc->len+1)); + if (cvec == NULL) { + free(copy); + return; + } + lc->cvec = cvec; + lc->cvec[lc->len++] = copy; +} + +/* =========================== Line editing ================================= */ + +/* We define a very simple "append buffer" structure, that is an heap + * allocated string where we can append to. This is useful in order to + * write all the escape sequences in a buffer and flush them to the standard + * output in a single call, to avoid flickering effects. */ +struct abuf { + char *b; + int len; +}; + +static void abInit(struct abuf *ab) { + ab->b = NULL; + ab->len = 0; +} + +static void abAppend(struct abuf *ab, const char *s, int len) { + char *new_ptr = (char*) realloc(ab->b,ab->len+len); + + if (new_ptr == NULL) return; + memcpy(new_ptr+ab->len,s,len); + ab->b = new_ptr; + ab->len += len; +} + +static void abFree(struct abuf *ab) { + free(ab->b); +} + +/* Helper of refreshSingleLine() and refreshMultiLine() to show hints + * to the right of the prompt. */ +static void refreshShowHints(struct abuf * ab, struct linenoiseState * l, int plen) { + char seq[64]; + if (hintsCallback && plen+l->len < l->cols) { + int color = -1, bold = 0; + const char *hint = hintsCallback(l->buf,&color,&bold); + if (hint) { + int hintlen = strlen(hint); + int hintmaxlen = l->cols-(plen+l->len); + if (hintlen > hintmaxlen) hintlen = hintmaxlen; + if (bold == 1 && color == -1) color = 37; + if (color != -1 || bold != 0) + snprintf(seq,64,"\033[%d;%d;49m",bold,color); + else + seq[0] = '\0'; + abAppend(ab,seq,strlen(seq)); + abAppend(ab,hint,hintlen); + if (color != -1 || bold != 0) + abAppend(ab,"\033[0m",4); + /* Call the function to free the hint returned. */ + if (freeHintsCallback) freeHintsCallback(hint); + } + } +} + +/* Single line low level line refresh. + * + * Rewrite the currently edited line accordingly to the buffer content, + * cursor position, and number of columns of the terminal. + * + * Flags is REFRESH_* macros. The function can just remove the old + * prompt, just write it, or both. */ +static void refreshSingleLine(struct linenoiseState *l, int flags) { + char seq[64]; + size_t plen = strlen(l->prompt); + int fd = l->ofd; + char *buf = l->buf; + size_t len = l->len; + size_t pos = l->pos; + struct abuf ab; + + while((plen+pos) >= l->cols) { + buf++; + len--; + pos--; + } + while (plen+len > l->cols) { + len--; + } + + abInit(&ab); + /* Cursor to left edge */ + snprintf(seq,sizeof(seq),"\r"); + abAppend(&ab,seq,strlen(seq)); + + if (flags & REFRESH_WRITE) { + /* Write the prompt and the current buffer content */ + abAppend(&ab,l->prompt,strlen(l->prompt)); + if (maskmode == 1) { + while (len--) abAppend(&ab,"*",1); + } else { + abAppend(&ab,buf,len); + } + /* Show hits if any. */ + refreshShowHints(&ab,l,plen); + } + + /* Erase to right */ + snprintf(seq,sizeof(seq),"\x1b[0K"); + abAppend(&ab,seq,strlen(seq)); + + if (flags & REFRESH_WRITE) { + /* Move cursor to original position. */ + snprintf(seq,sizeof(seq),"\r\x1b[%dC", (int)(pos+plen)); + abAppend(&ab,seq,strlen(seq)); + } + + if (write(fd,ab.b,ab.len) == -1) {} /* Can't recover from write error. */ + abFree(&ab); +} + +/* Multi line low level line refresh. + * + * Rewrite the currently edited line accordingly to the buffer content, + * cursor position, and number of columns of the terminal. + * + * Flags is REFRESH_* macros. The function can just remove the old + * prompt, just write it, or both. */ +static void refreshMultiLine(struct linenoiseState *l, int flags) { + char seq[64]; + int plen = strlen(l->prompt); + int rows = (plen+l->len+l->cols-1)/l->cols; /* rows used by current buf. */ + int rpos = (plen+l->oldpos+l->cols)/l->cols; /* cursor relative row. */ + int rpos2; /* rpos after refresh. */ + int col; /* colum position, zero-based. */ + int old_rows = l->oldrows; + int fd = l->ofd, j; + struct abuf ab; + + l->oldrows = rows; + + /* First step: clear all the lines used before. To do so start by + * going to the last row. */ + abInit(&ab); + + if (flags & REFRESH_CLEAN) { + if (old_rows-rpos > 0) { + lndebug("go down %d", old_rows-rpos); + snprintf(seq,64,"\x1b[%dB", old_rows-rpos); + abAppend(&ab,seq,strlen(seq)); + } + + /* Now for every row clear it, go up. */ + for (j = 0; j < old_rows-1; j++) { + lndebug("clear+up"); + snprintf(seq,64,"\r\x1b[0K\x1b[1A"); + abAppend(&ab,seq,strlen(seq)); + } + } + + if (flags & REFRESH_ALL) { + /* Clean the top line. */ + lndebug("clear"); + snprintf(seq,64,"\r\x1b[0K"); + abAppend(&ab,seq,strlen(seq)); + } + + if (flags & REFRESH_WRITE) { + /* Write the prompt and the current buffer content */ + abAppend(&ab,l->prompt,strlen(l->prompt)); + if (maskmode == 1) { + unsigned int i; + for (i = 0; i < l->len; i++) abAppend(&ab,"*",1); + } else { + abAppend(&ab,l->buf,l->len); + } + + /* Show hits if any. */ + refreshShowHints(&ab,l,plen); + + /* If we are at the very end of the screen with our prompt, we need to + * emit a newline and move the prompt to the first column. */ + if (l->pos && + l->pos == l->len && + (l->pos+plen) % l->cols == 0) + { + lndebug(""); + abAppend(&ab,"\n",1); + snprintf(seq,64,"\r"); + abAppend(&ab,seq,strlen(seq)); + rows++; + if (rows > (int)l->oldrows) l->oldrows = rows; + } + + /* Move cursor to right position. */ + rpos2 = (plen+l->pos+l->cols)/l->cols; /* Current cursor relative row */ + lndebug("rpos2 %d", rpos2); + + /* Go up till we reach the expected positon. */ + if (rows-rpos2 > 0) { + lndebug("go-up %d", rows-rpos2); + snprintf(seq,64,"\x1b[%dA", rows-rpos2); + abAppend(&ab,seq,strlen(seq)); + } + + /* Set column. */ + col = (plen+(int)l->pos) % (int)l->cols; + lndebug("set col %d", 1+col); + if (col) + snprintf(seq,64,"\r\x1b[%dC", col); + else + snprintf(seq,64,"\r"); + abAppend(&ab,seq,strlen(seq)); + } + + lndebug("\n"); + l->oldpos = l->pos; + + if (write(fd,ab.b,ab.len) == -1) {} /* Can't recover from write error. */ + abFree(&ab); +} + +/* Calls the two low level functions refreshSingleLine() or + * refreshMultiLine() according to the selected mode. */ +static void refreshLineWithFlags(struct linenoiseState *l, int flags) { + if (mlmode) + refreshMultiLine(l,flags); + else + refreshSingleLine(l,flags); +} + +/* Utility function to avoid specifying REFRESH_ALL all the times. */ +static void refreshLine(struct linenoiseState *l) { + refreshLineWithFlags(l,REFRESH_ALL); +} + +/* Hide the current line, when using the multiplexing API. */ +void linenoiseHide(struct linenoiseState *l) { + if (mlmode) + refreshMultiLine(l,REFRESH_CLEAN); + else + refreshSingleLine(l,REFRESH_CLEAN); +} + +/* Show the current line, when using the multiplexing API. */ +void linenoiseShow(struct linenoiseState *l) { + if (l->in_completion) { + refreshLineWithCompletion(l,NULL,REFRESH_WRITE); + } else { + refreshLineWithFlags(l,REFRESH_WRITE); + } +} + +/* Insert the character 'c' at cursor current position. + * + * On error writing to the terminal -1 is returned, otherwise 0. */ +static int linenoiseEditInsert(struct linenoiseState * l, char c) { + if (l->len < l->buflen) { + if (l->len == l->pos) { + l->buf[l->pos] = c; + l->pos++; + l->len++; + l->buf[l->len] = '\0'; + if ((!mlmode && l->plen+l->len < l->cols && !hintsCallback)) { + /* Avoid a full update of the line in the + * trivial case. */ + char d = (maskmode==1) ? '*' : c; + if (write(l->ofd,&d,1) == -1) return -1; + } else { + refreshLine(l); + } + } else { + memmove(l->buf+l->pos+1,l->buf+l->pos,l->len-l->pos); + l->buf[l->pos] = c; + l->len++; + l->pos++; + l->buf[l->len] = '\0'; + refreshLine(l); + } + } + return 0; +} + +/* Move cursor on the left. */ +static void linenoiseEditMoveLeft(struct linenoiseState * l) { + if (l->pos > 0) { + l->pos--; + refreshLine(l); + } +} + +/* Move cursor on the right. */ +static void linenoiseEditMoveRight(struct linenoiseState * l) { + if (l->pos != l->len) { + l->pos++; + refreshLine(l); + } +} + +/* Move cursor to the start of the line. */ +static void linenoiseEditMoveHome(struct linenoiseState * l) { + if (l->pos != 0) { + l->pos = 0; + refreshLine(l); + } +} + +/* Move cursor to the end of the line. */ +static void linenoiseEditMoveEnd(struct linenoiseState * l) { + if (l->pos != l->len) { + l->pos = l->len; + refreshLine(l); + } +} + +/* Substitute the currently edited line with the next or previous history + * entry as specified by 'dir'. */ +#define LINENOISE_HISTORY_NEXT 0 +#define LINENOISE_HISTORY_PREV 1 + +static void linenoiseEditHistoryNext(struct linenoiseState * l, int dir) { + if (history_len > 1) { + /* Update the current history entry before to + * overwrite it with the next one. */ + free(history[history_len - 1 - l->history_index]); + history[history_len - 1 - l->history_index] = strdup(l->buf); + /* Show the new entry */ + l->history_index += (dir == LINENOISE_HISTORY_PREV) ? 1 : -1; + if (l->history_index < 0) { + l->history_index = 0; + return; + } else if (l->history_index >= history_len) { + l->history_index = history_len-1; + return; + } + strncpy(l->buf,history[history_len - 1 - l->history_index],l->buflen); + l->buf[l->buflen-1] = '\0'; + l->len = l->pos = strlen(l->buf); + refreshLine(l); + } +} + +/* Delete the character at the right of the cursor without altering the cursor + * position. Basically this is what happens with the "Delete" keyboard key. */ +static void linenoiseEditDelete(struct linenoiseState * l) { + if (l->len > 0 && l->pos < l->len) { + memmove(l->buf+l->pos,l->buf+l->pos+1,l->len-l->pos-1); + l->len--; + l->buf[l->len] = '\0'; + refreshLine(l); + } +} + +/* Backspace implementation. */ +static void linenoiseEditBackspace(struct linenoiseState * l) { + if (l->pos > 0 && l->len > 0) { + memmove(l->buf+l->pos-1,l->buf+l->pos,l->len-l->pos); + l->pos--; + l->len--; + l->buf[l->len] = '\0'; + refreshLine(l); + } +} + +/* Delete the previosu word, maintaining the cursor at the start of the + * current word. */ +static void linenoiseEditDeletePrevWord(struct linenoiseState * l) { + size_t old_pos = l->pos; + size_t diff; + + while (l->pos > 0 && l->buf[l->pos-1] == ' ') + l->pos--; + while (l->pos > 0 && l->buf[l->pos-1] != ' ') + l->pos--; + diff = old_pos - l->pos; + memmove(l->buf+l->pos,l->buf+old_pos,l->len-old_pos+1); + l->len -= diff; + refreshLine(l); +} + +/* This function is part of the multiplexed API of Linenoise, that is used + * in order to implement the blocking variant of the API but can also be + * called by the user directly in an event driven program. It will: + * + * 1. Initialize the linenoise state passed by the user. + * 2. Put the terminal in RAW mode. + * 3. Show the prompt. + * 4. Return control to the user, that will have to call linenoiseEditFeed() + * each time there is some data arriving in the standard input. + * + * The user can also call linenoiseEditHide() and linenoiseEditShow() if it + * is required to show some input arriving asyncronously, without mixing + * it with the currently edited line. + * + * When linenoiseEditFeed() returns non-NULL, the user finished with the + * line editing session (pressed enter CTRL-D/C): in this case the caller + * needs to call linenoiseEditStop() to put back the terminal in normal + * mode. This will not destroy the buffer, as long as the linenoiseState + * is still valid in the context of the caller. + * + * The function returns 0 on success, or -1 if writing to standard output + * fails. If stdin_fd or stdout_fd are set to -1, the default is to use + * STDIN_FILENO and STDOUT_FILENO. + */ +int linenoiseEditStart(struct linenoiseState *l, int stdin_fd, int stdout_fd, char *buf, size_t buflen, const char *prompt) { + /* Populate the linenoise state that we pass to functions implementing + * specific editing functionalities. */ + l->in_completion = 0; + l->ifd = stdin_fd != -1 ? stdin_fd : STDIN_FILENO; + l->ofd = stdout_fd != -1 ? stdout_fd : STDOUT_FILENO; + l->buf = buf; + l->buflen = buflen; + l->prompt = prompt; + l->plen = strlen(prompt); + l->oldpos = l->pos = 0; + l->len = 0; + + /* Enter raw mode. */ + if (enableRawMode(l->ifd) == -1) return -1; + + l->cols = getColumns(stdin_fd, stdout_fd); + l->oldrows = 0; + l->history_index = 0; + + /* Buffer starts empty. */ + l->buf[0] = '\0'; + l->buflen--; /* Make sure there is always space for the nulterm */ + + /* If stdin is not a tty, stop here with the initialization. We + * will actually just read a line from standard input in blocking + * mode later, in linenoiseEditFeed(). */ + if (!isatty(l->ifd)) return 0; + + /* The latest history entry is always our current buffer, that + * initially is just an empty string. */ + linenoiseHistoryAdd(""); + + if (write(l->ofd,prompt,l->plen) == -1) return -1; + return 0; +} + +const char* linenoiseEditMore = "If you see this, you are misusing the API: when linenoiseEditFeed() is called, if it returns linenoiseEditMore the user is yet editing the line. See the README file for more information."; + +/* This function is part of the multiplexed API of linenoise, see the top + * comment on linenoiseEditStart() for more information. Call this function + * each time there is some data to read from the standard input file + * descriptor. In the case of blocking operations, this function can just be + * called in a loop, and block. + * + * The function returns linenoiseEditMore to signal that line editing is still + * in progress, that is, the user didn't yet pressed enter / CTRL-D. Otherwise + * the function returns the pointer to the heap-allocated buffer with the + * edited line, that the user should free with linenoiseFree(). + * + * On special conditions, NULL is returned and errno is populated: + * + * EAGAIN if the user pressed Ctrl-C + * ENOENT if the user pressed Ctrl-D + * + * Some other errno: I/O error. + */ +const char *linenoiseEditFeed(struct linenoiseState *l) { + /* Not a TTY, pass control to line reading without character + * count limits. */ + if (!isatty(l->ifd)) return linenoiseNoTTY(); + + char c; + int nread; + char seq[3]; + + nread = read(l->ifd,&c,1); + if (nread <= 0) return NULL; + + /* Only autocomplete when the callback is set. It returns < 0 when + * there was an error reading from fd. Otherwise it will return the + * character that should be handled next. */ + if ((l->in_completion || c == 9) && completionCallback != NULL) { + c = completeLine(l,c); + /* Read next character when 0 */ + if (c == 0) return linenoiseEditMore; + } + + switch(c) { + case ENTER: /* enter */ + history_len--; + free(history[history_len]); + if (mlmode) linenoiseEditMoveEnd(l); + if (hintsCallback) { + /* Force a refresh without hints to leave the previous + * line as the user typed it after a newline. */ + linenoiseHintsCallback *hc = hintsCallback; + hintsCallback = NULL; + refreshLine(l); + hintsCallback = hc; + } + return strdup(l->buf); + case CTRL_C: /* ctrl-c */ + errno = EAGAIN; + return NULL; + case BACKSPACE: /* backspace */ + case 8: /* ctrl-h */ + linenoiseEditBackspace(l); + break; + case CTRL_D: /* ctrl-d, remove char at right of cursor, or if the + line is empty, act as end-of-file. */ + if (l->len > 0) { + linenoiseEditDelete(l); + } else { + history_len--; + free(history[history_len]); + errno = ENOENT; + return NULL; + } + break; + case CTRL_T: /* ctrl-t, swaps current character with previous. */ + if (l->pos > 0 && l->pos < l->len) { + int aux = l->buf[l->pos-1]; + l->buf[l->pos-1] = l->buf[l->pos]; + l->buf[l->pos] = aux; + if (l->pos != l->len-1) l->pos++; + refreshLine(l); + } + break; + case CTRL_B: /* ctrl-b */ + linenoiseEditMoveLeft(l); + break; + case CTRL_F: /* ctrl-f */ + linenoiseEditMoveRight(l); + break; + case CTRL_P: /* ctrl-p */ + linenoiseEditHistoryNext(l, LINENOISE_HISTORY_PREV); + break; + case CTRL_N: /* ctrl-n */ + linenoiseEditHistoryNext(l, LINENOISE_HISTORY_NEXT); + break; + case ESC: /* escape sequence */ + /* Read the next two bytes representing the escape sequence. + * Use two calls to handle slow terminals returning the two + * chars at different times. */ + if (read(l->ifd,seq,1) == -1) break; + if (read(l->ifd,seq+1,1) == -1) break; + + /* ESC [ sequences. */ + if (seq[0] == '[') { + if (seq[1] >= '0' && seq[1] <= '9') { + /* Extended escape, read additional byte. */ + if (read(l->ifd,seq+2,1) == -1) break; + if (seq[2] == '~') { + switch(seq[1]) { + case '3': /* Delete key. */ + linenoiseEditDelete(l); + break; + } + } + } else { + switch(seq[1]) { + case 'A': /* Up */ + linenoiseEditHistoryNext(l, LINENOISE_HISTORY_PREV); + break; + case 'B': /* Down */ + linenoiseEditHistoryNext(l, LINENOISE_HISTORY_NEXT); + break; + case 'C': /* Right */ + linenoiseEditMoveRight(l); + break; + case 'D': /* Left */ + linenoiseEditMoveLeft(l); + break; + case 'H': /* Home */ + linenoiseEditMoveHome(l); + break; + case 'F': /* End*/ + linenoiseEditMoveEnd(l); + break; + } + } + } + + /* ESC O sequences. */ + else if (seq[0] == 'O') { + switch(seq[1]) { + case 'H': /* Home */ + linenoiseEditMoveHome(l); + break; + case 'F': /* End*/ + linenoiseEditMoveEnd(l); + break; + } + } + break; + default: + if (linenoiseEditInsert(l,c)) return NULL; + break; + case CTRL_U: /* Ctrl+u, delete the whole line. */ + l->buf[0] = '\0'; + l->pos = l->len = 0; + refreshLine(l); + break; + case CTRL_K: /* Ctrl+k, delete from current to end of line. */ + l->buf[l->pos] = '\0'; + l->len = l->pos; + refreshLine(l); + break; + case CTRL_A: /* Ctrl+a, go to the start of the line */ + linenoiseEditMoveHome(l); + break; + case CTRL_E: /* ctrl+e, go to the end of the line */ + linenoiseEditMoveEnd(l); + break; + case CTRL_L: /* ctrl+l, clear screen */ + linenoiseClearScreen(); + refreshLine(l); + break; + case CTRL_W: /* ctrl+w, delete previous word */ + linenoiseEditDeletePrevWord(l); + break; + } + return linenoiseEditMore; +} + +/* This is part of the multiplexed linenoise API. See linenoiseEditStart() + * for more information. This function is called when linenoiseEditFeed() + * returns something different than NULL. At this point the user input + * is in the buffer, and we can restore the terminal in normal mode. */ +void linenoiseEditStop(struct linenoiseState *l) { + if (!isatty(l->ifd)) return; + disableRawMode(l->ifd); + printf("\n"); +} + +/* This just implements a blocking loop for the multiplexed API. + * In many applications that are not event-drivern, we can just call + * the blocking linenoise API, wait for the user to complete the editing + * and return the buffer. */ +static const char *linenoiseBlockingEdit(int stdin_fd, int stdout_fd, char *buf, size_t buflen, const char *prompt) +{ + struct linenoiseState l; + + /* Editing without a buffer is invalid. */ + if (buflen == 0) { + errno = EINVAL; + return NULL; + } + + linenoiseEditStart(&l,stdin_fd,stdout_fd,buf,buflen,prompt); + const char *res; + while((res = linenoiseEditFeed(&l)) == linenoiseEditMore); + linenoiseEditStop(&l); + return res; +} + +/* This special mode is used by linenoise in order to print scan codes + * on screen for debugging / development purposes. It is implemented + * by the linenoise_example program using the --keycodes option. */ +void linenoisePrintKeyCodes(void) { + char quit[4]; + + printf("Linenoise key codes debugging mode.\n" + "Press keys to see scan codes. Type 'quit' at any time to exit.\n"); + if (enableRawMode(STDIN_FILENO) == -1) return; + memset(quit,' ',4); + while(1) { + char c; + int nread; + + nread = read(STDIN_FILENO,&c,1); + if (nread <= 0) continue; + memmove(quit,quit+1,sizeof(quit)-1); /* shift string to left. */ + quit[sizeof(quit)-1] = c; /* Insert current char on the right. */ + if (memcmp(quit,"quit",sizeof(quit)) == 0) break; + + printf("'%c' %02x (%d) (type quit to exit)\n", + isprint(c) ? c : '?', (int)c, (int)c); + printf("\r"); /* Go left edge manually, we are in raw mode. */ + fflush(stdout); + } + disableRawMode(STDIN_FILENO); +} + +/* This function is called when linenoise() is called with the standard + * input file descriptor not attached to a TTY. So for example when the + * program using linenoise is called in pipe or with a file redirected + * to its standard input. In this case, we want to be able to return the + * line regardless of its length (by default we are limited to 4k). */ +static char *linenoiseNoTTY(void) { + char *line = NULL; + size_t len = 0, maxlen = 0; + + while(1) { + if (len == maxlen) { + if (maxlen == 0) maxlen = 16; + maxlen *= 2; + char *oldval = line; + line = (char*) realloc(line,maxlen); + if (line == NULL) { + if (oldval) free(oldval); + return NULL; + } + } + int c = fgetc(stdin); + if (c == EOF || c == '\n') { + if (c == EOF && len == 0) { + free(line); + return NULL; + } else { + line[len] = '\0'; + return line; + } + } else { + line[len] = c; + len++; + } + } +} + +/* The high level function that is the main API of the linenoise library. + * This function checks if the terminal has basic capabilities, just checking + * for a blacklist of stupid terminals, and later either calls the line + * editing function or uses dummy fgets() so that you will be able to type + * something even in the most desperate of the conditions. */ +const char *linenoise(const char *prompt) { + char buf[LINENOISE_MAX_LINE]; + + if (!isatty(STDIN_FILENO)) { + /* Not a tty: read from file / pipe. In this mode we don't want any + * limit to the line size, so we call a function to handle that. */ + return linenoiseNoTTY(); + } else if (isUnsupportedTerm()) { + size_t len; + + printf("%s",prompt); + fflush(stdout); + if (fgets(buf,LINENOISE_MAX_LINE,stdin) == NULL) return NULL; + len = strlen(buf); + while(len && (buf[len-1] == '\n' || buf[len-1] == '\r')) { + len--; + buf[len] = '\0'; + } + return strdup(buf); + } else { + const char *retval = linenoiseBlockingEdit(STDIN_FILENO,STDOUT_FILENO,buf,LINENOISE_MAX_LINE,prompt); + return retval; + } +} + +/* This is just a wrapper the user may want to call in order to make sure + * the linenoise returned buffer is freed with the same allocator it was + * created with. Useful when the main program is using an alternative + * allocator. */ +void linenoiseFree(void *ptr) { + if (ptr == linenoiseEditMore) return; // Protect from API misuse. + free(ptr); +} + +/* ================================ History ================================= */ + +/* Free the history, but does not reset it. Only used when we have to + * exit() to avoid memory leaks are reported by valgrind & co. */ +static void freeHistory(void) { + if (history) { + int j; + + for (j = 0; j < history_len; j++) + free(history[j]); + free(history); + } +} + +/* At exit we'll try to fix the terminal to the initial conditions. */ +static void linenoiseAtExit(void) { + disableRawMode(STDIN_FILENO); + freeHistory(); +} + +/* This is the API call to add a new entry in the linenoise history. + * It uses a fixed array of char pointers that are shifted (memmoved) + * when the history max length is reached in order to remove the older + * entry and make room for the new one, so it is not exactly suitable for huge + * histories, but will work well for a few hundred of entries. + * + * Using a circular buffer is smarter, but a bit more complex to handle. */ +int linenoiseHistoryAdd(const char *line) { + char *linecopy; + + if (history_max_len == 0) return 0; + + /* Initialization on first call. */ + if (history == NULL) { + history = (char**) malloc(sizeof(char*)*history_max_len); + if (history == NULL) return 0; + memset(history,0,(sizeof(char*)*history_max_len)); + } + + /* Don't add duplicated lines. */ + if (history_len && !strcmp(history[history_len-1], line)) return 0; + + /* Add an heap allocated copy of the line in the history. + * If we reached the max length, remove the older line. */ + linecopy = strdup(line); + if (!linecopy) return 0; + if (history_len == history_max_len) { + free(history[0]); + memmove(history,history+1,sizeof(char*)*(history_max_len-1)); + history_len--; + } + history[history_len] = linecopy; + history_len++; + return 1; +} + +/* Set the maximum length for the history. This function can be called even + * if there is already some history, the function will make sure to retain + * just the latest 'len' elements if the new history length value is smaller + * than the amount of items already inside the history. */ +int linenoiseHistorySetMaxLen(int len) { + char **new_ptr; + + if (len < 1) return 0; + if (history) { + int tocopy = history_len; + + new_ptr = (char**) malloc(sizeof(char*)*len); + if (new_ptr == NULL) return 0; + + /* If we can't copy everything, free the elements we'll not use. */ + if (len < tocopy) { + int j; + + for (j = 0; j < tocopy-len; j++) free(history[j]); + tocopy = len; + } + memset(new_ptr,0,sizeof(char*)*len); + memcpy(new_ptr,history+(history_len-tocopy), sizeof(char*)*tocopy); + free(history); + history = new_ptr; + } + history_max_len = len; + if (history_len > history_max_len) + history_len = history_max_len; + return 1; +} + +/* Save the history in the specified file. On success 0 is returned + * otherwise -1 is returned. */ +int linenoiseHistorySave(const char *filename) { + mode_t old_umask = umask(S_IXUSR|S_IRWXG|S_IRWXO); + FILE *fp; + int j; + + fp = fopen(filename,"w"); + umask(old_umask); + if (fp == NULL) return -1; + chmod(filename,S_IRUSR|S_IWUSR); + for (j = 0; j < history_len; j++) + fprintf(fp,"%s\n",history[j]); + fclose(fp); + return 0; +} + +/* Load the history from the specified file. If the file does not exist + * zero is returned and no operation is performed. + * + * If the file exists and the operation succeeded 0 is returned, otherwise + * on error -1 is returned. */ +int linenoiseHistoryLoad(const char *filename) { + FILE *fp = fopen(filename,"r"); + char buf[LINENOISE_MAX_LINE]; + + if (fp == NULL) return -1; + + while (fgets(buf,LINENOISE_MAX_LINE,fp) != NULL) { + char *p; + + p = strchr(buf,'\r'); + if (!p) p = strchr(buf,'\n'); + if (p) *p = '\0'; + linenoiseHistoryAdd(buf); + } + fclose(fp); + return 0; +} +#endif diff --git a/examples/run/linenoise.cpp/linenoise.h b/examples/run/linenoise.cpp/linenoise.h new file mode 100644 index 000000000..3e25f4de3 --- /dev/null +++ b/examples/run/linenoise.cpp/linenoise.h @@ -0,0 +1,114 @@ +/* linenoise.h -- VERSION 1.0 + * + * Guerrilla line editing library against the idea that a line editing lib + * needs to be 20,000 lines of C++ code. + * + * See linenoise.cpp for more information. + * + * ------------------------------------------------------------------------ + * + * Copyright (c) 2010-2023, Salvatore Sanfilippo + * Copyright (c) 2010-2013, Pieter Noordhuis + * Copyright (c) 2025, Eric Curtin + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __LINENOISE_H +#define __LINENOISE_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include /* For size_t. */ + +extern const char *linenoiseEditMore; + +/* The linenoiseState structure represents the state during line editing. + * We pass this state to functions implementing specific editing + * functionalities. */ +struct linenoiseState { + int in_completion; /* The user pressed TAB and we are now in completion + * mode, so input is handled by completeLine(). */ + size_t completion_idx; /* Index of next completion to propose. */ + int ifd; /* Terminal stdin file descriptor. */ + int ofd; /* Terminal stdout file descriptor. */ + char *buf; /* Edited line buffer. */ + size_t buflen; /* Edited line buffer size. */ + const char *prompt; /* Prompt to display. */ + size_t plen; /* Prompt length. */ + size_t pos; /* Current cursor position. */ + size_t oldpos; /* Previous refresh cursor position. */ + size_t len; /* Current edited line length. */ + size_t cols; /* Number of columns in terminal. */ + size_t oldrows; /* Rows used by last refrehsed line (multiline mode) */ + int history_index; /* The history index we are currently editing. */ +}; + +typedef struct linenoiseCompletions { + size_t len; + char **cvec; +} linenoiseCompletions; + +/* Non blocking API. */ +int linenoiseEditStart(struct linenoiseState *l, int stdin_fd, int stdout_fd, char *buf, size_t buflen, const char *prompt); +const char *linenoiseEditFeed(struct linenoiseState *l); +void linenoiseEditStop(struct linenoiseState *l); +void linenoiseHide(struct linenoiseState *l); +void linenoiseShow(struct linenoiseState *l); + +/* Blocking API. */ +const char *linenoise(const char *prompt); +void linenoiseFree(void *ptr); + +/* Completion API. */ +typedef void(linenoiseCompletionCallback)(const char *, linenoiseCompletions *); +typedef const char*(linenoiseHintsCallback)(const char *, int *color, int *bold); +typedef void(linenoiseFreeHintsCallback)(const char *); +void linenoiseSetCompletionCallback(linenoiseCompletionCallback *); +void linenoiseSetHintsCallback(linenoiseHintsCallback *); +void linenoiseSetFreeHintsCallback(linenoiseFreeHintsCallback *); +void linenoiseAddCompletion(linenoiseCompletions *, const char *); + +/* History API. */ +int linenoiseHistoryAdd(const char *line); +int linenoiseHistorySetMaxLen(int len); +int linenoiseHistorySave(const char *filename); +int linenoiseHistoryLoad(const char *filename); + +/* Other utilities. */ +void linenoiseClearScreen(void); +void linenoiseSetMultiLine(int ml); +void linenoisePrintKeyCodes(void); +void linenoiseMaskModeEnable(void); +void linenoiseMaskModeDisable(void); + +#ifdef __cplusplus +} +#endif + +#endif /* __LINENOISE_H */ diff --git a/examples/run/run.cpp b/examples/run/run.cpp index 0ad8bb15b..dd9ea79e8 100644 --- a/examples/run/run.cpp +++ b/examples/run/run.cpp @@ -19,12 +19,14 @@ #include #include #include +#include #include #include #include #include "common.h" #include "json.hpp" +#include "linenoise.cpp/linenoise.h" #include "llama-cpp.h" #if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__)) || defined(_WIN32) @@ -536,7 +538,7 @@ class LlamaData { llama_sampler_ptr sampler; llama_context_ptr context; std::vector messages; - std::vector msg_strs; + std::list msg_strs; std::vector fmtted; int init(Opt & opt) { @@ -807,24 +809,44 @@ static int generate(LlamaData & llama_data, const std::string & prompt, std::str batch = llama_batch_get_one(&new_token_id, 1); } + printf("\033[0m"); return 0; } -static int read_user_input(std::string & user) { - std::getline(std::cin, user); +static int read_user_input(std::string & user_input) { + static const char * prompt_prefix = "> "; +#ifdef WIN32 + printf( + "\r%*s" + "\r\033[0m%s", + get_terminal_width(), " ", prompt_prefix); + + std::getline(std::cin, user_input); if (std::cin.eof()) { printf("\n"); return 1; } - - if (user == "/bye") { +#else + std::unique_ptr line(const_cast(linenoise(prompt_prefix)), free); + if (!line) { return 1; } - if (user.empty()) { + user_input = line.get(); +#endif + + if (user_input == "/bye") { + return 1; + } + + if (user_input.empty()) { return 2; } +#ifndef WIN32 + linenoiseHistoryAdd(line.get()); +#endif + return 0; // Should have data in happy path } @@ -865,10 +887,6 @@ static int handle_user_input(std::string & user_input, const std::string & user) return 0; // No need for interactive input } - printf( - "\r%*s" - "\r\033[32m> \033[0m", - get_terminal_width(), " "); return read_user_input(user_input); // Returns true if input ends the loop } From 99487b57d47e14dc342b7b89d238ca11c0345241 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Scipione?= Date: Sun, 19 Jan 2025 14:33:34 +0100 Subject: [PATCH 7/9] SYCL: Introducing memory host pool (#11251) * Implement host pool for matrix_info Creating a new memory pool on the host to store memory location for matrix_info needed to launch gemm_batch from oneMKL/oneMath. Removing complex support in gemm_batch since it is not used in llama.cpp * Remove unnecessary headers and cast * Reorder member variable to avoid warning on initialization * Formatting * Remove unused variable * Address PR review feedback - remove warning --------- Signed-off-by: nscipione --- ggml/src/ggml-sycl/common.hpp | 13 +++ ggml/src/ggml-sycl/dpct/helper.hpp | 135 +++++++++-------------------- ggml/src/ggml-sycl/ggml-sycl.cpp | 92 ++++++++++++++++++-- 3 files changed, 137 insertions(+), 103 deletions(-) diff --git a/ggml/src/ggml-sycl/common.hpp b/ggml/src/ggml-sycl/common.hpp index e9500f3a1..abad847ca 100644 --- a/ggml/src/ggml-sycl/common.hpp +++ b/ggml/src/ggml-sycl/common.hpp @@ -333,8 +333,12 @@ struct ggml_backend_sycl_context { // pool std::unique_ptr pools[GGML_SYCL_MAX_DEVICES]; + std::unique_ptr host_pools[GGML_SYCL_MAX_DEVICES]; + static std::unique_ptr new_pool_for_device(queue_ptr qptr, int device); + static std::unique_ptr new_pool_for_host(queue_ptr qptr, int device); + ggml_sycl_pool & pool(int device) { if (pools[device] == nullptr) { pools[device] = new_pool_for_device(stream(device,0), device); @@ -345,6 +349,15 @@ struct ggml_backend_sycl_context { ggml_sycl_pool & pool() { return pool(device); } + + ggml_sycl_pool & host_pool(int device) { + if (host_pools[device] == nullptr) { + host_pools[device] = new_pool_for_host(stream(device, 0), device); + } + return *host_pools[device]; + } + + ggml_sycl_pool & host_pool() { return host_pool(device); } }; // common device functions diff --git a/ggml/src/ggml-sycl/dpct/helper.hpp b/ggml/src/ggml-sycl/dpct/helper.hpp index e167948e7..c96395be6 100644 --- a/ggml/src/ggml-sycl/dpct/helper.hpp +++ b/ggml/src/ggml-sycl/dpct/helper.hpp @@ -82,6 +82,14 @@ inline std::string get_device_backend_and_type(const sycl::device &device) { return device_type.str(); } +template struct matrix_info_t { + oneapi::mkl::transpose transpose_info[2]; + Ts value_info[2]; + std::int64_t size_info[3]; + std::int64_t ld_info[3]; + std::int64_t groupsize_info; +}; + namespace dpct { typedef sycl::queue *queue_ptr; @@ -1727,26 +1735,13 @@ namespace dpct }; template - inline void gemm_batch_impl(sycl::queue &q, oneapi::mkl::transpose a_trans, - oneapi::mkl::transpose b_trans, int m, int n, int k, - const void *alpha, const void **a, int lda, - const void **b, int ldb, const void *beta, void **c, - int ldc, int batch_size) - { - struct matrix_info_t - { - oneapi::mkl::transpose transpose_info[2]; - Ts value_info[2]; - std::int64_t size_info[3]; - std::int64_t ld_info[3]; - std::int64_t groupsize_info; - }; - + inline void gemm_batch_impl(sycl::queue & q, oneapi::mkl::transpose a_trans, oneapi::mkl::transpose b_trans, + int m, int n, int k, const void * alpha, const void ** a, int lda, const void ** b, + int ldb, const void * beta, void ** c, int ldc, int batch_size, + matrix_info_t * matrix_info) { Ts alpha_value = dpct::get_value(reinterpret_cast(alpha), q); Ts beta_value = dpct::get_value(reinterpret_cast(beta), q); - matrix_info_t *matrix_info = - (matrix_info_t *)std::malloc(sizeof(matrix_info_t)); matrix_info->transpose_info[0] = a_trans; matrix_info->transpose_info[1] = b_trans; matrix_info->value_info[0] = alpha_value; @@ -1763,23 +1758,18 @@ namespace dpct sycl::event e = oneapi::mkl::blas::column_major::gemm_batch( oneapi::mkl::backend_selector{ q }, matrix_info->transpose_info, matrix_info->transpose_info + 1, matrix_info->size_info, matrix_info->size_info + 1, - matrix_info->size_info + 2, matrix_info->value_info, reinterpret_cast(a), - matrix_info->ld_info, reinterpret_cast(b), matrix_info->ld_info + 1, - matrix_info->value_info + 1, reinterpret_cast(c), matrix_info->ld_info + 2, 1, - &(matrix_info->groupsize_info)); + matrix_info->size_info + 2, reinterpret_cast(matrix_info->value_info), + reinterpret_cast(a), matrix_info->ld_info, reinterpret_cast(b), + matrix_info->ld_info + 1, reinterpret_cast(matrix_info->value_info + 1), + reinterpret_cast(c), matrix_info->ld_info + 2, 1, &(matrix_info->groupsize_info)); #else sycl::event e = oneapi::mkl::blas::column_major::gemm_batch( q, matrix_info->transpose_info, matrix_info->transpose_info + 1, matrix_info->size_info, - matrix_info->size_info + 1, matrix_info->size_info + 2, matrix_info->value_info, + matrix_info->size_info + 1, matrix_info->size_info + 2, reinterpret_cast(matrix_info->value_info), reinterpret_cast(a), matrix_info->ld_info, reinterpret_cast(b), - matrix_info->ld_info + 1, matrix_info->value_info + 1, reinterpret_cast(c), - matrix_info->ld_info + 2, 1, &(matrix_info->groupsize_info)); + matrix_info->ld_info + 1, reinterpret_cast(matrix_info->value_info + 1), + reinterpret_cast(c), matrix_info->ld_info + 2, 1, &(matrix_info->groupsize_info)); #endif - - q.submit([&](sycl::handler &cgh) - { - cgh.depends_on(e); - cgh.host_task([=] { std::free(matrix_info); }); }); } template @@ -2422,25 +2412,11 @@ namespace dpct /// \param [in] ldc Leading dimension of C. /// \param [in] batch_size Specifies the number of matrix multiply operations to perform. /// \param [in] scaling_type Data type of the scaling factors. - inline void gemm_batch(sycl::queue &q, oneapi::mkl::transpose a_trans, - oneapi::mkl::transpose b_trans, int m, int n, int k, - const void *alpha, const void *a[], - library_data_t a_type, int lda, const void *b[], - library_data_t b_type, int ldb, const void *beta, - void *c[], library_data_t c_type, int ldc, - int batch_size, library_data_t scaling_type) - { - if (scaling_type == library_data_t::real_float && - c_type == library_data_t::complex_float) - { - scaling_type = library_data_t::complex_float; - } - else if (scaling_type == library_data_t::real_double && - c_type == library_data_t::complex_double) - { - scaling_type = library_data_t::complex_double; - } - + inline void gemm_batch(sycl::queue & q, oneapi::mkl::transpose a_trans, oneapi::mkl::transpose b_trans, int m, + int n, int k, const void * alpha, const void * a[], library_data_t a_type, int lda, + const void * b[], library_data_t b_type, int ldb, const void * beta, void * c[], + library_data_t c_type, int ldc, int batch_size, library_data_t scaling_type, + matrix_info_t * matrix_info) { std::uint64_t key = detail::get_type_combination_id(a_type, b_type, c_type, scaling_type); switch (key) @@ -2449,48 +2425,24 @@ namespace dpct library_data_t::real_float, library_data_t::real_float, library_data_t::real_float, library_data_t::real_float): { - detail::gemm_batch_impl( - q, a_trans, b_trans, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, - batch_size); + detail::gemm_batch_impl(q, a_trans, b_trans, m, n, k, alpha, a, lda, b, ldb, + beta, c, ldc, batch_size, matrix_info); break; } case detail::get_type_combination_id( library_data_t::real_double, library_data_t::real_double, library_data_t::real_double, library_data_t::real_double): { - detail::gemm_batch_impl( - q, a_trans, b_trans, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, - batch_size); - break; - } - case detail::get_type_combination_id( - library_data_t::complex_float, library_data_t::complex_float, - library_data_t::complex_float, library_data_t::complex_float): - { - detail::gemm_batch_impl, std::complex, - std::complex, std::complex>( - q, a_trans, b_trans, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, - batch_size); - break; - } - case detail::get_type_combination_id( - library_data_t::complex_double, library_data_t::complex_double, - library_data_t::complex_double, library_data_t::complex_double): - { - detail::gemm_batch_impl, std::complex, - std::complex, std::complex>( - q, a_trans, b_trans, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, - batch_size); + detail::gemm_batch_impl(q, a_trans, b_trans, m, n, k, alpha, a, lda, b, ldb, + beta, c, ldc, batch_size, matrix_info); break; } case detail::get_type_combination_id( library_data_t::real_half, library_data_t::real_half, library_data_t::real_half, library_data_t::real_half): { - detail::gemm_batch_impl(q, a_trans, b_trans, m, n, k, alpha, - a, lda, b, ldb, beta, c, ldc, - batch_size); + detail::gemm_batch_impl( + q, a_trans, b_trans, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, batch_size, matrix_info); break; } #ifdef __INTEL_MKL__ @@ -2498,19 +2450,16 @@ namespace dpct library_data_t::real_bfloat16, library_data_t::real_bfloat16, library_data_t::real_bfloat16, library_data_t::real_float): { - detail::gemm_batch_impl( - q, a_trans, b_trans, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, - batch_size); + detail::gemm_batch_impl( + q, a_trans, b_trans, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, batch_size, matrix_info); break; } case detail::get_type_combination_id( library_data_t::real_bfloat16, library_data_t::real_bfloat16, library_data_t::real_float, library_data_t::real_float): { - detail::gemm_batch_impl(q, a_trans, b_trans, m, n, k, alpha, a, lda, - b, ldb, beta, c, ldc, batch_size); + detail::gemm_batch_impl( + q, a_trans, b_trans, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, batch_size, matrix_info); break; } #endif @@ -2522,10 +2471,9 @@ namespace dpct dpct::get_value(reinterpret_cast(alpha), q); float beta_float = dpct::get_value(reinterpret_cast(beta), q); - detail::gemm_batch_impl(q, a_trans, b_trans, m, n, k, &alpha_float, - a, lda, b, ldb, &beta_float, c, ldc, - batch_size); + detail::gemm_batch_impl( + q, a_trans, b_trans, m, n, k, &alpha_float, a, lda, b, ldb, &beta_float, c, ldc, batch_size, + matrix_info); break; } case detail::get_type_combination_id( @@ -2533,8 +2481,7 @@ namespace dpct library_data_t::real_float, library_data_t::real_float): { detail::gemm_batch_impl( - q, a_trans, b_trans, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, - batch_size); + q, a_trans, b_trans, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, batch_size, matrix_info); break; } case detail::get_type_combination_id( @@ -2542,8 +2489,7 @@ namespace dpct library_data_t::real_float, library_data_t::real_float): { detail::gemm_batch_impl( - q, a_trans, b_trans, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, - batch_size); + q, a_trans, b_trans, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, batch_size, matrix_info); break; } case detail::get_type_combination_id( @@ -2557,8 +2503,7 @@ namespace dpct sycl::half alpha_half(alpha_value); sycl::half beta_half(beta_value); detail::gemm_batch_impl( - q, a_trans, b_trans, m, n, k, &alpha_half, a, lda, b, ldb, &beta_half, c, ldc, - batch_size); + q, a_trans, b_trans, m, n, k, &alpha_half, a, lda, b, ldb, &beta_half, c, ldc, batch_size, matrix_info); break; } default: diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp index 5272ca454..ed4d8bb8b 100644 --- a/ggml/src/ggml-sycl/ggml-sycl.cpp +++ b/ggml/src/ggml-sycl/ggml-sycl.cpp @@ -1173,6 +1173,85 @@ struct ggml_sycl_pool_leg : public ggml_sycl_pool { } }; +struct ggml_sycl_pool_host : public ggml_sycl_pool { + queue_ptr qptr; + int device; + + inline static int counter{ 0 }; + + struct ggml_sycl_buffer { + void * ptr = nullptr; + size_t size = 0; + }; + + // Set arbitrarly to 64 + static constexpr int MAX_POOL_SIZE{ 64 }; + std::vector buffer_pool = std::vector(MAX_POOL_SIZE); + size_t pool_size = 0; + + explicit ggml_sycl_pool_host(queue_ptr qptr_, int device_) : qptr(qptr_), device(device_) {} + + ~ggml_sycl_pool_host() { + for (int i = 0; i < MAX_POOL_SIZE; ++i) { + ggml_sycl_buffer & b = buffer_pool[i]; + if (b.ptr != nullptr) { + SYCL_CHECK(CHECK_TRY_ERROR(sycl::free(b.ptr, *qptr))); + b.ptr = nullptr; + pool_size -= b.size; + b.size = 0; + } + } + counter = 0; + } + + void * alloc(size_t size, size_t * actual_size) override { + if (counter == MAX_POOL_SIZE) { + ggml_sycl_buffer b = buffer_pool[0]; + void * ptr = b.ptr; + *actual_size = b.size; + counter = 1; + return ptr; + } + ggml_sycl_buffer & b = buffer_pool[counter]; + + if (b.ptr == nullptr) { + void * ptr; + + SYCL_CHECK(CHECK_TRY_ERROR(ptr = (void *) sycl::malloc_host(size, *qptr))); + if (!ptr) { + GGML_LOG_ERROR("%s: can't allocate %lu Bytes of memory on host\n", __func__, size); + return nullptr; + } + pool_size += size; + *actual_size = size; + counter = counter + 1; + return ptr; + } else { + ++counter; + b.size = size; + return b.ptr; + } + } + + void free(void * ptr, size_t size) override { + // if the pool is not completed add the pointer to it in place of the first nullptr found. + // Otherwise do nothing, pointers will be freed once the pool is deallocated. + for (int i = 0; i < MAX_POOL_SIZE; ++i) { + ggml_sycl_buffer & b = buffer_pool[i]; + if (b.ptr == nullptr) { + b.ptr = ptr; + b.size = size; + return; + } + } + } +}; + +std::unique_ptr ggml_backend_sycl_context::new_pool_for_host(queue_ptr qptr, int device) { + // return pool for the host to speed up memory management + return std::unique_ptr(new ggml_sycl_pool_host(qptr, device)); +} + std::unique_ptr ggml_backend_sycl_context::new_pool_for_device(queue_ptr qptr, int device) { // TBD: NO VMM support // if (ggml_sycl_info().devices[device].vmm) { @@ -3363,6 +3442,7 @@ static void ggml_sycl_mul_mat_batched_sycl(ggml_backend_sycl_context & ctx, ggml_sycl_pool_alloc ptrs_src(ctx.pool(), 2*ne23); ggml_sycl_pool_alloc< void *> ptrs_dst(ctx.pool(), 1*ne23); + ggml_sycl_pool_alloc> matrix_info(ctx.host_pool(), 1); sycl::range<3> block_dims(1, ne12, ne13); /* @@ -3391,14 +3471,10 @@ static void ggml_sycl_mul_mat_batched_sycl(ggml_backend_sycl_context & ctx, }); } SYCL_CHECK(CHECK_TRY_ERROR(dpct::gemm_batch( - *main_stream, oneapi::mkl::transpose::trans, - oneapi::mkl::transpose::nontrans, ne01, ne11, ne10, alpha, - (const void **)(ptrs_src.get() + 0 * ne23), - dpct::library_data_t::real_half, nb01 / nb00, - (const void **)(ptrs_src.get() + 1 * ne23), - dpct::library_data_t::real_half, nb11 / nb10, beta, - (void **)(ptrs_dst.get() + 0 * ne23), cu_data_type, ne01, ne23, - cu_compute_type))); + *main_stream, oneapi::mkl::transpose::trans, oneapi::mkl::transpose::nontrans, ne01, ne11, ne10, alpha, + (const void **) (ptrs_src.get() + 0 * ne23), dpct::library_data_t::real_half, nb01 / nb00, + (const void **) (ptrs_src.get() + 1 * ne23), dpct::library_data_t::real_half, nb11 / nb10, beta, + (void **) (ptrs_dst.get() + 0 * ne23), cu_data_type, ne01, ne23, cu_compute_type, matrix_info.get()))); } } catch (sycl::exception const &exc) { From b9daaffe02d6a77d85f0420bce5dfe0e00daeff6 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sun, 19 Jan 2025 18:12:09 +0200 Subject: [PATCH 8/9] simple-chat : fix BOS being added to each message (#11278) --- examples/simple-chat/simple-chat.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/simple-chat/simple-chat.cpp b/examples/simple-chat/simple-chat.cpp index e8eda9c22..26422601d 100644 --- a/examples/simple-chat/simple-chat.cpp +++ b/examples/simple-chat/simple-chat.cpp @@ -95,11 +95,11 @@ int main(int argc, char ** argv) { llama_sampler_chain_add(smpl, llama_sampler_init_dist(LLAMA_DEFAULT_SEED)); // helper function to evaluate a prompt and generate a response - auto generate = [&](const std::string & prompt) { + auto generate = [&](const std::string & prompt, bool is_first) { std::string response; // tokenize the prompt - const int n_prompt_tokens = -llama_tokenize(vocab, prompt.c_str(), prompt.size(), NULL, 0, true, true); + const int n_prompt_tokens = -llama_tokenize(vocab, prompt.c_str(), prompt.size(), NULL, 0, is_first, true); std::vector prompt_tokens(n_prompt_tokens); if (llama_tokenize(vocab, prompt.c_str(), prompt.size(), prompt_tokens.data(), prompt_tokens.size(), llama_get_kv_cache_used_cells(ctx) == 0, true) < 0) { GGML_ABORT("failed to tokenize the prompt\n"); @@ -180,7 +180,7 @@ int main(int argc, char ** argv) { // generate a response printf("\033[33m"); - std::string response = generate(prompt); + std::string response = generate(prompt, prev_len == 0); printf("\n\033[0m"); // add the response to the messages From 92bc493917d43b83e592349e138b54c90b1c3ea7 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sun, 19 Jan 2025 20:22:30 +0200 Subject: [PATCH 9/9] tests : increase timeout when sanitizers are enabled (#11300) * tests : increase timeout when sanitizers are enabled * tests : add DEFAULT_HTTP_TIMEOUT --- .github/workflows/server.yml | 25 ++++++++++++++++++++++--- examples/server/tests/utils.py | 5 ++++- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml index 671fe595c..ed1c357a5 100644 --- a/.github/workflows/server.yml +++ b/.github/workflows/server.yml @@ -112,9 +112,9 @@ jobs: -DGGML_OPENMP=OFF ; cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server - - name: Build - id: cmake_build - if: ${{ matrix.sanitizer != 'THREAD' }} + - name: Build (sanitizers) + id: cmake_build_sanitizers + if: ${{ matrix.sanitizer != '' && matrix.sanitizer != 'THREAD' }} run: | cmake -B build \ -DGGML_NATIVE=OFF \ @@ -124,12 +124,31 @@ jobs: -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ; cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server + - name: Build (sanitizers) + id: cmake_build + if: ${{ matrix.sanitizer == '' }} + run: | + cmake -B build \ + -DGGML_NATIVE=OFF \ + -DLLAMA_BUILD_SERVER=ON \ + -DLLAMA_CURL=ON \ + -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} ; + cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server + - name: Tests id: server_integration_tests + if: ${{ matrix.sanitizer == '' }} run: | cd examples/server/tests ./tests.sh + - name: Tests (sanitizers) + id: server_integration_tests_sanitizers + if: ${{ matrix.sanitizer != '' }} + run: | + cd examples/server/tests + LLAMA_SANITIZE=1 ./tests.sh + - name: Slow tests id: server_integration_tests_slow if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }} diff --git a/examples/server/tests/utils.py b/examples/server/tests/utils.py index 73be4c92f..9d1a7a5b0 100644 --- a/examples/server/tests/utils.py +++ b/examples/server/tests/utils.py @@ -26,6 +26,9 @@ from re import RegexFlag import wget +DEFAULT_HTTP_TIMEOUT = 10 if "LLAMA_SANITIZE" not in os.environ else 30 + + class ServerResponse: headers: dict status_code: int @@ -88,7 +91,7 @@ class ServerProcess: if "PORT" in os.environ: self.server_port = int(os.environ["PORT"]) - def start(self, timeout_seconds: int = 10) -> None: + def start(self, timeout_seconds: int | None = DEFAULT_HTTP_TIMEOUT) -> None: if "LLAMA_SERVER_BIN_PATH" in os.environ: server_path = os.environ["LLAMA_SERVER_BIN_PATH"] elif os.name == "nt":