drivers: udc_dwc2: Optimize endpoint interrupt handling
SEGGER Ozone J-Trace Code Profile identified iterations over daint value as hot path. The iterations show at the very top of code profile because full iteration happens whenever there is any activity on endpoint. Optimize daint handling loops so only set bits are iterated over. While this optimization depends on find_lsb_set() efficiency, it seems to be worth it solely on the basis that quite often only few bits are set. After a bit deeper analysis, I was suprised that on ARM Cortex-M33 the find_lsb_set() approach is faster than naive iteration even if all bits are set (which is extreme case because USB applications are unlikely to use all 16 IN and 16 OUT endpoints simultaneously). This is due to fact that there is only one conditional jump CBNZ and find_lsb_set() - 1 translates to RBIT + CLZ and then clearing the bit uses LSL.W + BIC.W. Whereas the naive itation uses ADDS + CMP + BNE for the loop handling and also has LSR.W + LSLS + BPL (+ ADD.W instruction on each iteration to add 16 for OUT endpoints) for the continue check. Therefore the optimized code on ARM Cortex-M33 is never worse than naive iteration. Signed-off-by: Tomasz Moń <tomasz.mon@nordicsemi.no>
This commit is contained in:
parent
050b8a915c
commit
8b4c53e05a
@ -780,6 +780,14 @@ USB_DWC2_GET_FIELD_DEFINE(dsts_enumspd, DSTS_ENUMSPD)
|
||||
|
||||
/* Device all endpoints interrupt registers */
|
||||
#define USB_DWC2_DAINT 0x0818UL
|
||||
#define USB_DWC2_DAINT_OUTEPINT_POS 16UL
|
||||
#define USB_DWC2_DAINT_OUTEPINT_MASK (0xFFFFUL << USB_DWC2_DAINT_OUTEPINT_POS)
|
||||
#define USB_DWC2_DAINT_INEPINT_POS 0UL
|
||||
#define USB_DWC2_DAINT_INEPINT_MASK (0xFFFFUL << USB_DWC2_DAINT_INEPINT_POS)
|
||||
|
||||
USB_DWC2_GET_FIELD_DEFINE(daint_outepint, DAINT_OUTEPINT)
|
||||
USB_DWC2_GET_FIELD_DEFINE(daint_inepint, DAINT_INEPINT)
|
||||
|
||||
#define USB_DWC2_DAINTMSK 0x081CUL
|
||||
#define USB_DWC2_DAINT_OUTEPINT(ep_num) BIT(16UL + ep_num)
|
||||
#define USB_DWC2_DAINT_INEPINT(ep_num) BIT(ep_num)
|
||||
|
||||
@ -2570,32 +2570,31 @@ static inline void dwc2_handle_in_xfercompl(const struct device *dev,
|
||||
static inline void dwc2_handle_iepint(const struct device *dev)
|
||||
{
|
||||
struct usb_dwc2_reg *const base = dwc2_get_base(dev);
|
||||
const uint8_t n_max = 16;
|
||||
uint32_t diepmsk;
|
||||
uint32_t daint;
|
||||
uint32_t epint;
|
||||
|
||||
diepmsk = sys_read32((mem_addr_t)&base->diepmsk);
|
||||
daint = sys_read32((mem_addr_t)&base->daint);
|
||||
epint = usb_dwc2_get_daint_inepint(sys_read32((mem_addr_t)&base->daint));
|
||||
|
||||
for (uint8_t n = 0U; n < n_max; n++) {
|
||||
while (epint) {
|
||||
uint8_t n = find_lsb_set(epint) - 1;
|
||||
mem_addr_t diepint_reg = (mem_addr_t)&base->in_ep[n].diepint;
|
||||
uint32_t diepint;
|
||||
uint32_t status;
|
||||
|
||||
if (daint & USB_DWC2_DAINT_INEPINT(n)) {
|
||||
/* Read and clear interrupt status */
|
||||
diepint = sys_read32(diepint_reg);
|
||||
status = diepint & diepmsk;
|
||||
sys_write32(status, diepint_reg);
|
||||
/* Read and clear interrupt status */
|
||||
diepint = sys_read32(diepint_reg);
|
||||
status = diepint & diepmsk;
|
||||
sys_write32(status, diepint_reg);
|
||||
|
||||
LOG_DBG("ep 0x%02x interrupt status: 0x%x",
|
||||
n | USB_EP_DIR_IN, status);
|
||||
|
||||
if (status & USB_DWC2_DIEPINT_XFERCOMPL) {
|
||||
dwc2_handle_in_xfercompl(dev, n);
|
||||
}
|
||||
LOG_DBG("ep 0x%02x interrupt status: 0x%x",
|
||||
n | USB_EP_DIR_IN, status);
|
||||
|
||||
if (status & USB_DWC2_DIEPINT_XFERCOMPL) {
|
||||
dwc2_handle_in_xfercompl(dev, n);
|
||||
}
|
||||
|
||||
epint &= ~BIT(n);
|
||||
}
|
||||
|
||||
/* Clear IEPINT interrupt */
|
||||
@ -2678,22 +2677,18 @@ static inline void dwc2_handle_oepint(const struct device *dev)
|
||||
{
|
||||
struct usb_dwc2_reg *const base = dwc2_get_base(dev);
|
||||
struct udc_dwc2_data *const priv = udc_get_private(dev);
|
||||
const uint8_t n_max = 16;
|
||||
uint32_t doepmsk;
|
||||
uint32_t daint;
|
||||
uint32_t epint;
|
||||
|
||||
doepmsk = sys_read32((mem_addr_t)&base->doepmsk);
|
||||
daint = sys_read32((mem_addr_t)&base->daint);
|
||||
epint = usb_dwc2_get_daint_outepint(sys_read32((mem_addr_t)&base->daint));
|
||||
|
||||
for (uint8_t n = 0U; n < n_max; n++) {
|
||||
while (epint) {
|
||||
uint8_t n = find_lsb_set(epint) - 1;
|
||||
mem_addr_t doepint_reg = (mem_addr_t)&base->out_ep[n].doepint;
|
||||
uint32_t doepint;
|
||||
uint32_t status;
|
||||
|
||||
if (!(daint & USB_DWC2_DAINT_OUTEPINT(n))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Read and clear interrupt status */
|
||||
doepint = sys_read32(doepint_reg);
|
||||
status = doepint & doepmsk;
|
||||
@ -2739,6 +2734,8 @@ static inline void dwc2_handle_oepint(const struct device *dev)
|
||||
if (status & USB_DWC2_DOEPINT_XFERCOMPL) {
|
||||
dwc2_handle_out_xfercompl(dev, n);
|
||||
}
|
||||
|
||||
epint &= ~BIT(n);
|
||||
}
|
||||
|
||||
/* Clear OEPINT interrupt */
|
||||
|
||||
Loading…
Reference in New Issue
Block a user