From 35e313f9e5ff7044ccd9ca2950c10e03c19a0ed0 Mon Sep 17 00:00:00 2001
From: Dmitrii Golovanov <dmitrii.golovanov@intel.com>
Date: Wed, 29 May 2024 16:57:35 +0200
Subject: [PATCH] twister: recording: Allow JSON data fields

Extend Twister Harness recording feature to allow selected data fields,
extracted from the log by a regular expression, to be parsed into JSON
objects and eventually reported in `twister.json` as `recording` list
property of the test suite.

With this extension, log records can convey layered data structures
passed from a test image as summary results, traces, statistics, etc.

This extension also allows flexible recording structure: a test image
can output different types of data records incapsulated into a fixed
set of fields, so `recording.csv` file columns are respected, whereas
some of the columns keep strings with json-encoded semi-structured data.

Signed-off-by: Dmitrii Golovanov <dmitrii.golovanov@intel.com>
---
 doc/develop/test/twister.rst                  | 45 +++++++++++++++++--
 scripts/pylib/twister/twisterlib/harness.py   | 20 ++++++++-
 scripts/schemas/twister/testsuite-schema.yaml |  5 +++
 scripts/tests/twister/test_harness.py         | 33 ++++++++++----
 4 files changed, 91 insertions(+), 12 deletions(-)
diff --git a/doc/develop/test/twister.rst b/doc/develop/test/twister.rst
index 4773a3e3466..99a4e546e62 100644
--- a/doc/develop/test/twister.rst
+++ b/doc/develop/test/twister.rst
@@ -504,16 +504,55 @@ harness_config: <harness configuration options>
         The regular expression with named subgroups to match data fields
         at the test's output lines where the test provides some custom data
         for further analysis. These records will be written into the build
-        directory 'recording.csv' file as well as 'recording' property
-        of the test suite object in 'twister.json'.
+        directory ``recording.csv`` file as well as ``recording`` property
+        of the test suite object in ``twister.json``.
 
-        For example, to extract three data fields 'metric', 'cycles', 'nanoseconds':
+        For example, to extract three data fields ``metric``, ``cycles``,
+        ``nanoseconds``:
 
         .. code-block:: yaml
 
           record:
             regex: "(?P<metric>.*):(?P<cycles>.*) cycles, (?P<nanoseconds>.*) ns"
 
+      as_json: <list of regex subgroup names> (optional)
+        Data fields, extracted by the regular expression into named subgroups,
+        which will be additionally parsed as JSON encoded strings and written
+        into ``twister.json`` as nested ``recording`` object properties.
+        The corresponding ``recording.csv`` columns will contain strings as-is.
+
+        Using this option, a test log can convey layered data structures
+        passed from the test image for further analysis with summary results,
+        traces, statistics, etc.
+
+        For example, this configuration:
+
+        .. code-block:: yaml
+
+          record:
+            regex: "RECORD:(?P<type>.*):DATA:(?P<metrics>.*)"
+            as_json: [metrics]
+
+        when matched to a test log string:
+
+        .. code-block:: none
+
+          RECORD:jitter_drift:DATA:{"rollovers":0, "mean_us":1000.0}
+
+        will be reported in ``twister.json`` as:
+
+        .. code-block:: json
+
+          "recording":[
+              {
+                   "type":"jitter_drift",
+                   "metrics":{
+                       "rollovers":0,
+                       "mean_us":1000.0
+                   }
+              }
+          ]
+
     fixture: <expression>
         Specify a test case dependency on an external device(e.g., sensor),
         and identify setups that fulfill this dependency. It depends on
diff --git a/scripts/pylib/twister/twisterlib/harness.py b/scripts/pylib/twister/twisterlib/harness.py
index 6108703c22f..1518cdbd79e 100644
--- a/scripts/pylib/twister/twisterlib/harness.py
+++ b/scripts/pylib/twister/twisterlib/harness.py
@@ -13,6 +13,7 @@ import logging
 import threading
 import time
 import shutil
+import json
 
 from twisterlib.error import ConfigurationError
 from twisterlib.environment import ZEPHYR_BASE, PYTEST_PLUGIN_INSTALLED
@@ -57,6 +58,7 @@ class Harness:
         self.next_pattern = 0
         self.record = None
         self.record_pattern = None
+        self.record_as_json = None
         self.recording = []
         self.ztest = False
         self.detected_suite_names = []
@@ -82,6 +84,7 @@ class Harness:
             self.record = config.get('record', {})
             if self.record:
                 self.record_pattern = re.compile(self.record.get("regex", ""))
+                self.record_as_json = self.record.get("as_json")
 
     def build(self):
         pass
@@ -92,12 +95,27 @@ class Harness:
         """
         return self.id
 
+    def translate_record(self, record: dict) -> dict:
+        if self.record_as_json:
+            for k in self.record_as_json:
+                if not k in record:
+                    continue
+                try:
+                    record[k] = json.loads(record[k]) if record[k] else {}
+                except json.JSONDecodeError as parse_error:
+                    logger.warning(f"HARNESS:{self.__class__.__name__}: recording JSON failed:"
+                                   f" {parse_error} for '{k}':'{record[k]}'")
+                    # Don't set the Harness state to failed for recordings.
+                    record[k] = { 'ERROR': { 'msg': str(parse_error), 'doc': record[k] } }
+        return record
+
     def parse_record(self, line) -> re.Match:
         match = None
         if self.record_pattern:
             match = self.record_pattern.search(line)
             if match:
-                self.recording.append({ k:v.strip() for k,v in match.groupdict(default="").items() })
+                rec = self.translate_record({ k:v.strip() for k,v in match.groupdict(default="").items() })
+                self.recording.append(rec)
         return match
     #
 
diff --git a/scripts/schemas/twister/testsuite-schema.yaml b/scripts/schemas/twister/testsuite-schema.yaml
index 1eae7dcef1b..28b7e730edb 100644
--- a/scripts/schemas/twister/testsuite-schema.yaml
+++ b/scripts/schemas/twister/testsuite-schema.yaml
@@ -130,6 +130,11 @@ schema;scenario-schema:
             "regex":
               type: str
               required: true
+            "as_json":
+              type: seq
+              required: false
+              sequence:
+                - type: str
         "bsim_exe_name":
           type: str
           required: false
diff --git a/scripts/tests/twister/test_harness.py b/scripts/tests/twister/test_harness.py
index ddebb2c5b2e..2e0028b234a 100644
--- a/scripts/tests/twister/test_harness.py
+++ b/scripts/tests/twister/test_harness.py
@@ -45,23 +45,40 @@ def process_logs(harness, logs):
 
 
 TEST_DATA_RECORDING = [
-                ([''], "^START:(?P<foo>.*):END", []),
-                (['START:bar:STOP'], "^START:(?P<foo>.*):END", []),
-                (['START:bar:END'], "^START:(?P<foo>.*):END", [{'foo':'bar'}]),
-                (['START:bar:baz:END'], "^START:(?P<foo>.*):(?P<boo>.*):END", [{'foo':'bar', 'boo':'baz'}]),
+                ([''], "^START:(?P<foo>.*):END", [], None),
+                (['START:bar:STOP'], "^START:(?P<foo>.*):END", [], None),
+                (['START:bar:END'], "^START:(?P<foo>.*):END", [{'foo':'bar'}], None),
+                (['START:bar:baz:END'], "^START:(?P<foo>.*):(?P<boo>.*):END", [{'foo':'bar', 'boo':'baz'}], None),
                 (['START:bar:baz:END','START:may:jun:END'], "^START:(?P<foo>.*):(?P<boo>.*):END",
-                 [{'foo':'bar', 'boo':'baz'}, {'foo':'may', 'boo':'jun'}]),
+                 [{'foo':'bar', 'boo':'baz'}, {'foo':'may', 'boo':'jun'}], None),
+                (['START:bar:END'], "^START:(?P<foo>.*):END", [{'foo':'bar'}], []),
+                (['START:bar:END'], "^START:(?P<foo>.*):END", [{'foo':'bar'}], ['boo']),
+                (['START:bad_json:END'], "^START:(?P<foo>.*):END",
+                 [{'foo':{'ERROR':{'msg':'Expecting value: line 1 column 1 (char 0)', 'doc':'bad_json'}}}], ['foo']),
+                (['START::END'], "^START:(?P<foo>.*):END", [{'foo':{}}], ['foo']),
+                (['START: {"one":1, "two":2} :END'], "^START:(?P<foo>.*):END", [{'foo':{'one':1, 'two':2}}], ['foo']),
+                (['START: {"one":1, "two":2} :STOP:oops:END'], "^START:(?P<foo>.*):STOP:(?P<boo>.*):END",
+                   [{'foo':{'one':1, 'two':2},'boo':'oops'}], ['foo']),
+                (['START: {"one":1, "two":2} :STOP:{"oops":0}:END'], "^START:(?P<foo>.*):STOP:(?P<boo>.*):END",
+                   [{'foo':{'one':1, 'two':2},'boo':{'oops':0}}], ['foo','boo']),
                       ]
 @pytest.mark.parametrize(
-    "lines, pattern, expected_records",
+    "lines, pattern, expected_records, as_json",
     TEST_DATA_RECORDING,
-    ids=["empty", "no match", "match 1 field", "match 2 fields", "match 2 records"]
+    ids=["empty", "no match", "match 1 field", "match 2 fields", "match 2 records",
+         "as_json empty", "as_json no such field", "error parsing json", "empty json value", "simple json",
+         "plain field and json field", "two json fields"
+        ]
 )
-def test_harness_parse_record(lines, pattern, expected_records):
+def test_harness_parse_record(lines, pattern, expected_records, as_json):
     harness = Harness()
     harness.record = { 'regex': pattern }
     harness.record_pattern = re.compile(pattern)
 
+    harness.record_as_json = as_json
+    if as_json is not None:
+        harness.record['as_json'] = as_json
+
     assert not harness.recording
 
     for line in lines: