import os
import re
common_words = set([
'about', 'after', 'all', 'also', 'an', 'and',
'any', 'are', 'as', 'at',
'be', 'because', 'but', 'by', 'can', 'come',
'could', 'day', 'do', 'even',
'first', 'for', 'get', 'give', 'go', 'has',
'have', 'he', 'her',
'him', 'his', 'how', 'I', 'in', 'into', 'it',
'its', 'just',
'know', 'like', 'look', 'make', 'man', 'many',
'me', 'more', 'my', 'new',
'no', 'not', 'now', 'of', 'one', 'only', 'or',
'other', 'our', 'out',
'over', 'people', 'say', 'see', 'she', 'so',
'some', 'take', 'tell', 'than',
'their', 'them', 'then', 'there', 'these',
'they', 'think',
'this', 'time', 'two', 'up', 'use', 'very',
'want', 'was', 'way',
'we', 'well', 'what', 'when', 'which', 'who',
'will', 'with', 'would',
'year', 'you', 'your'
])
valid_extensions = set([
'c', 'h', 'yaml', 'cmake', 'conf', 'txt', 'overlay',
'rst', 'dtsi',
'Kconfig', 'dts', 'defconfig', 'yml', 'ld', 'sh', 'py',
'soc', 'cfg'
])
def filter_repeated_words(text):
# Split the text into lines
lines = text.split('\n')
# Combine lines into a single string with unique separator
combined_text = '/*sep*/'.join(lines)
# Replace repeated words within a line
def replace_within_line(match):
return match.group(1)
# Regex for matching repeated words within a line
within_line_pattern =
re.compile(r'\b(' +
'|'.join(map(re.escape, common_words)) +
r')\b\s+\b\1\b')
combined_text = within_line_pattern.
sub(replace_within_line, combined_text)
# Replace repeated words across line boundaries
def replace_across_lines(match):
return match.group(1) + match.group(2)
# Regex for matching repeated words across line boundaries
across_lines_pattern = re.
compile(r'\b(' + '|'.join(
map(re.escape, common_words)) +
r')\b(\s*[*\/\n\s]*)\b\1\b')
combined_text = across_lines_pattern.
sub(replace_across_lines, combined_text)
# Split the text back into lines
filtered_text = combined_text.split('/*sep*/')
return '\n'.join(filtered_text)
def process_file(file_path):
with open(file_path, 'r', encoding='utf-8') as file:
text = file.read()
new_text = filter_repeated_words(text)
with open(file_path, 'w', encoding='utf-8') as file:
file.write(new_text)
def process_directory(directory_path):
for root, dirs, files in os.walk(directory_path):
dirs[:] = [d for d in dirs if not d.startswith('.')]
for file in files:
# Filter out hidden files
if file.startswith('.'):
continue
file_extension = file.split('.')[-1]
if
file_extension in valid_extensions: # 只处理指定后缀的文件
file_path = os.path.join(root, file)
print(f"Processed file: {file_path}")
process_file(file_path)
directory_to_process = "/home/mi/works/github/zephyrproject/zephyr"
process_directory(directory_to_process)
Signed-off-by: Lingao Meng <menglingao@xiaomi.com>
|
||
|---|---|---|
| .. | ||
| CMakeLists.txt | ||
| Kconfig | ||
| Kconfig.altera | ||
| Kconfig.altera_jtag | ||
| Kconfig.apbuart | ||
| Kconfig.b91 | ||
| Kconfig.bcm2711 | ||
| Kconfig.bt | ||
| Kconfig.cc13xx_cc26xx | ||
| Kconfig.cc32xx | ||
| Kconfig.cdns | ||
| Kconfig.cmsdk_apb | ||
| Kconfig.efinix_sapphire | ||
| Kconfig.emul | ||
| Kconfig.ene | ||
| Kconfig.esp32 | ||
| Kconfig.gd32 | ||
| Kconfig.gecko | ||
| Kconfig.hostlink | ||
| Kconfig.ifx_cat1 | ||
| Kconfig.imx | ||
| Kconfig.intel_lw | ||
| Kconfig.it8xxx2 | ||
| Kconfig.leuart_gecko | ||
| Kconfig.litex | ||
| Kconfig.lpc11u6x | ||
| Kconfig.max32 | ||
| Kconfig.mcux | ||
| Kconfig.mcux_flexcomm | ||
| Kconfig.mcux_iuart | ||
| Kconfig.mcux_lpsci | ||
| Kconfig.mcux_lpuart | ||
| Kconfig.miv | ||
| Kconfig.msp432p4xx | ||
| Kconfig.native_posix | ||
| Kconfig.native_tty | ||
| Kconfig.neorv32 | ||
| Kconfig.npcx | ||
| Kconfig.nrfx | ||
| Kconfig.nrfx_uart_instance | ||
| Kconfig.ns16550 | ||
| Kconfig.numaker | ||
| Kconfig.numicro | ||
| Kconfig.nxp_s32 | ||
| Kconfig.opentitan | ||
| Kconfig.pl011 | ||
| Kconfig.psoc6 | ||
| Kconfig.ql_usbserialport_s3b | ||
| Kconfig.rcar | ||
| Kconfig.renesas_ra | ||
| Kconfig.rpi_pico | ||
| Kconfig.rtt | ||
| Kconfig.rv32m1_lpuart | ||
| Kconfig.rzt2m | ||
| Kconfig.sam0 | ||
| Kconfig.sedi | ||
| Kconfig.sifive | ||
| Kconfig.smartbond | ||
| Kconfig.stellaris | ||
| Kconfig.stm32 | ||
| Kconfig.test | ||
| Kconfig.uart_sam | ||
| Kconfig.usart_sam | ||
| Kconfig.xec | ||
| Kconfig.xen | ||
| Kconfig.xlnx | ||
| Kconfig.xmc4xxx | ||
| leuart_gecko.c | ||
| serial_esp32_usb.c | ||
| serial_test.c | ||
| uart_altera_jtag.c | ||
| uart_altera.c | ||
| uart_apbuart.c | ||
| uart_async_rx.c | ||
| uart_async_to_irq.c | ||
| uart_b91.c | ||
| uart_bcm2711.c | ||
| uart_bt.c | ||
| uart_cc13xx_cc26xx.c | ||
| uart_cc32xx.c | ||
| uart_cdns.c | ||
| uart_cdns.h | ||
| uart_cmsdk_apb.c | ||
| uart_efinix_sapphire.c | ||
| uart_emul.c | ||
| uart_ene_kb1200.c | ||
| uart_esp32.c | ||
| uart_gecko.c | ||
| uart_handlers.c | ||
| uart_hostlink.c | ||
| uart_hvc_xen_consoleio.c | ||
| uart_hvc_xen.c | ||
| uart_ifx_cat1.c | ||
| uart_imx.c | ||
| uart_intel_lw.c | ||
| uart_ite_it8xxx2.c | ||
| uart_liteuart.c | ||
| uart_lpc11u6x.c | ||
| uart_lpc11u6x.h | ||
| uart_max32.c | ||
| uart_mchp_xec.c | ||
| uart_mcux_flexcomm.c | ||
| uart_mcux_iuart.c | ||
| uart_mcux_lpsci.c | ||
| uart_mcux_lpuart.c | ||
| uart_mcux.c | ||
| uart_miv.c | ||
| uart_msp432p4xx.c | ||
| uart_native_ptty_bottom.c | ||
| uart_native_ptty_bottom.h | ||
| uart_native_ptty.c | ||
| uart_native_tty_bottom.c | ||
| uart_native_tty_bottom.h | ||
| uart_native_tty.c | ||
| uart_neorv32.c | ||
| uart_npcx.c | ||
| uart_nrfx_uart.c | ||
| uart_nrfx_uarte2.c | ||
| uart_nrfx_uarte.c | ||
| uart_ns16550.c | ||
| uart_numaker.c | ||
| uart_numicro.c | ||
| uart_nxp_s32_linflexd.c | ||
| uart_nxp_s32_linflexd.h | ||
| uart_opentitan.c | ||
| uart_pipe.c | ||
| uart_pl011_ambiq.h | ||
| uart_pl011_raspberrypi_pico.h | ||
| uart_pl011_registers.h | ||
| uart_pl011.c | ||
| uart_psoc6.c | ||
| uart_ql_usbserialport_s3b.c | ||
| uart_ql_usbserialport_s3b.h | ||
| uart_rcar.c | ||
| uart_renesas_ra.c | ||
| uart_rpi_pico_pio.c | ||
| uart_rtt.c | ||
| uart_rv32m1_lpuart.c | ||
| uart_rzt2m.c | ||
| uart_rzt2m.h | ||
| uart_sam0.c | ||
| uart_sam.c | ||
| uart_sedi.c | ||
| uart_sifive.c | ||
| uart_smartbond.c | ||
| uart_stellaris.c | ||
| uart_stm32.c | ||
| uart_stm32.h | ||
| uart_xlnx_ps.c | ||
| uart_xlnx_uartlite.c | ||
| uart_xmc4xxx.c | ||
| usart_gd32.c | ||
| usart_sam.c | ||