arm: Use one less instruction for prolog in slightly complex functions - lightning.git - Portable just-in-time compiler library

index : lightning.git
Portable just-in-time compiler library
summary refs log tree commit diff
diff options
context:
space:
mode:
authorpcpa <paulo.cesar.pereira.de.andrade@gmail.com>2023年02月03日 11:25:45 -0300
committerpcpa <paulo.cesar.pereira.de.andrade@gmail.com>2023年02月03日 11:25:45 -0300
commit8c3d3b123661a4ccceb252c4494c68126bfdada0 (patch)
treee47441ea12e0e0f9ffbfc88ee3160adaa23ca5e2
parent28020887ac68fbd7437b2c481efe6c39cd0b01ce (diff)
downloadlightning-8c3d3b123661a4ccceb252c4494c68126bfdada0.tar.gz
arm: Use one less instruction for prolog in slightly complex functions
Slightly complex functions would be functions that need to save r0-r3 in prolog because of calls C functions. The optimization is only for hard float (regardless or abi); could be done for software float in most cases but keep the code, already too complex, a bit simpler. The implementation still assumes there is no division by hardware. Almost certainly modern arm chips (armv7r) have it in hardware, otherwise, could use vfp/neon instructions and do division with double values. This could/should be changed. Currently only qemu is used to test different abis and setups.
Diffstat
-rw-r--r--lib/jit_arm-cpu.c 15
1 files changed, 12 insertions, 3 deletions
diff --git a/lib/jit_arm-cpu.c b/lib/jit_arm-cpu.c
index 6beae05..06e38ff 100644
--- a/lib/jit_arm-cpu.c
+++ b/lib/jit_arm-cpu.c
@@ -3911,6 +3911,9 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
}
if (_jitc->function->need_frame)
mask |= (1 << _FP_REGNO) | (1 << _LR_REGNO);
+ if (!jit_swf_p() && _jitc->function->save_reg_args &&
+ !(_jitc->function->self.call & jit_call_varargs))
+ mask |= 0xf;
if (jit_thumb_p()) {
/* switch to thumb mode (better approach would be to
@@ -3921,13 +3924,15 @@ _prolog(jit_state_t *_jit, jit_node_t *node)
BX(_R12_REGNO);
if (!_jitc->thumb)
_jitc->thumb = _jit->pc.w;
- if (_jitc->function->save_reg_args)
+ if (jit_swf_p() || (_jitc->function->save_reg_args &&
+ (_jitc->function->self.call & jit_call_varargs)))
T2_PUSH(0xf);
if (mask)
T2_PUSH(mask);
}
else {
- if (_jitc->function->save_reg_args)
+ if (jit_swf_p() || (_jitc->function->save_reg_args &&
+ (_jitc->function->self.call & jit_call_varargs)))
PUSH(0xf);
if (mask)
PUSH(mask);
@@ -3970,13 +3975,17 @@ _epilog(jit_state_t *_jit, jit_node_t *node)
mask |= (1 << _FP_REGNO) | (1 << _LR_REGNO);
movr(_SP_REGNO, _FP_REGNO);
}
+ if (!jit_swf_p() && _jitc->function->save_reg_args &&
+ !(_jitc->function->self.call & jit_call_varargs))
+ addi(_SP_REGNO, _SP_REGNO, 16);
if (mask) {
if (jit_thumb_p())
T2_POP(mask);
else
POP(mask);
}
- if (_jitc->function->save_reg_args)
+ if (jit_swf_p() || (_jitc->function->save_reg_args &&
+ (_jitc->function->self.call & jit_call_varargs)))
addi(_SP_REGNO, _SP_REGNO, 16);
if (jit_thumb_p())
T1_BX(_LR_REGNO);
generated by cgit v1.2.3 (git 2.39.1) at 2025年09月15日 18:37:31 +0000

AltStyle によって変換されたページ (->オリジナル) /