From 82c955ab7c65e68d9cff6a981f1b87b6f2fba5f4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Benjamin=20Cab=C3=A9?= <benjamin@zephyrproject.org>
Date: Wed, 2 Jul 2025 21:19:22 +0200
Subject: [PATCH] scripts: net: Remove dependency to lxml in
 enumerate_http_status.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Using the third party lxml library for the very simple parsing task this
script performs in unnecessary, so switch to Python's built-in
HTMLParser.

Signed-off-by: Benjamin Cabé <benjamin@zephyrproject.org>
---
 scripts/net/enumerate_http_status.py | 44 ++++++++++++++++++++--------
 1 file changed, 31 insertions(+), 13 deletions(-)

diff --git a/scripts/net/enumerate_http_status.py b/scripts/net/enumerate_http_status.py
index 231c01e6276..93317036cae 100755
--- a/scripts/net/enumerate_http_status.py
+++ b/scripts/net/enumerate_http_status.py
@@ -25,25 +25,43 @@ Usage:
 	HTTP_511_NETWORK_AUTHENTICATION_REQUIRED = 511, /**< Network Authentication Required */
 """
 
-from lxml import html
+from html.parser import HTMLParser
 import requests
 import re
 
+class HTTPStatusParser(HTMLParser):
+    def __init__(self):
+        super().__init__()
+        self.status_codes = {}
+        self.in_code_tag = False
+        self.current_data = ""
+
+    def handle_starttag(self, tag, attrs):
+        if tag == 'code':
+            self.in_code_tag = True
+            self.current_data = ""
+
+    def handle_endtag(self, tag):
+        if tag == 'code' and self.in_code_tag:
+            self.in_code_tag = False
+            if self.current_data.strip():
+                match = re.match(r'([0-9]{3}) ([a-zA-Z].*)', self.current_data.strip())
+                if match:
+                    code = int(match.group(1))
+                    description = match.group(2)
+                    self.status_codes[code] = description
+
+    def handle_data(self, data):
+        if self.in_code_tag:
+            self.current_data += data
+
 page = requests.get('https://developer.mozilla.org/en-US/docs/Web/HTTP/Status')
-tree = html.fromstring(page.content)
 
-codes = tree.xpath('//code/text()')
+parser = HTTPStatusParser()
+parser.feed(page.text)
 
-codes2 = {}
-for c in codes:
-    if re.match('[0-9][0-9][0-9] [a-zA-Z].*', c):
-        key = int(c[0:3])
-        val = c[4:]
-        codes2[key] = val
-
-keys = sorted(codes2.keys())
-for key in keys:
-    val = codes2[key]
+for key in sorted(parser.status_codes.keys()):
+    val = parser.status_codes[key]
     enum_head = 'HTTP'
     enum_body = f'{key}'
     enum_tail = val.upper().replace(' ', '_').replace("'", '').replace('-', '_')