Files
SC-F001/logtool/parser.py
2026-04-09 07:41:15 -05:00

466 lines
17 KiB
Python

"""
Binary log parser for SC-F001 flash logs.
On-disk entry format: [len u8][payload (len-1 bytes)][type u8] = len+1 total bytes
The firmware does len++ before writing, so stored len = payload_size + 1.
All values are little-endian.
"""
import struct
import json
import re
from pathlib import Path
from datetime import datetime
LOG_TYPE_BAT = 100
LOG_TYPE_CRASH = 101
LOG_TYPE_BOOT = 102
LOG_TYPE_TIME_SET = 103
# Fallback FSM state map (matches control_fsm.h)
_FALLBACK_FSM_STATES = {
0: "IDLE",
1: "MOVE_START_DELAY",
2: "JACK_UP_START",
3: "JACK_UP",
4: "DRIVE_START_DELAY",
5: "DRIVE",
6: "DRIVE_END_DELAY",
7: "JACK_DOWN",
8: "UNDO_JACK_START",
9: "CALIBRATE_JACK_DELAY",
10: "CALIBRATE_JACK_MOVE",
11: "CALIBRATE_DRIVE_DELAY",
12: "CALIBRATE_DRIVE_MOVE",
}
ESP_RESET_REASONS = {
0: "UNKNOWN",
1: "POWERON",
2: "EXT",
3: "SW",
4: "PANIC",
5: "INT_WDT",
6: "TASK_WDT",
7: "WDT",
8: "DEEPSLEEP",
9: "BROWNOUT",
10: "SDIO",
}
def load_fsm_states(fw_path=None) -> dict:
"""
Parse FSM state names from control_fsm.h.
Returns dict mapping int -> name string (e.g. {0: 'IDLE', ...}).
Falls back to hardcoded dict if the file can't be found or parsed.
"""
if fw_path is None:
# Default: sibling directory ../main relative to this file
fw_path = Path(__file__).parent.parent / "main"
header = Path(fw_path) / "control_fsm.h"
if not header.exists():
return dict(_FALLBACK_FSM_STATES)
try:
text = header.read_text()
# Find the fsm_state_t enum block
m = re.search(r'typedef\s+enum\s*\{([^}]+)\}\s*fsm_state_t\s*;', text, re.DOTALL)
if not m:
return dict(_FALLBACK_FSM_STATES)
states = {}
value = 0
for line in m.group(1).splitlines():
line = line.strip().rstrip(',')
if not line or line.startswith('//'):
continue
if '=' in line:
name, val = line.split('=', 1)
name = name.strip()
val = val.strip().split('//')[0].strip()
try:
value = int(val, 0)
except ValueError:
pass
else:
name = line.split('//')[0].strip()
if name:
# Strip STATE_ prefix for display brevity
display = name.removeprefix('STATE_') if hasattr(str, 'removeprefix') else (
name[6:] if name.startswith('STATE_') else name)
states[value] = display
value += 1
return states if states else dict(_FALLBACK_FSM_STATES)
except Exception:
return dict(_FALLBACK_FSM_STATES)
def _ts_to_str(ts_ms: int) -> str:
"""Convert ms-since-epoch (local-as-UTC) to display string."""
try:
dt = datetime.utcfromtimestamp(ts_ms / 1000.0)
return dt.strftime("%Y-%m-%d %H:%M:%S.") + f"{ts_ms % 1000:03d}"
except (OSError, ValueError):
return str(ts_ms)
def _unpack_fsm(payload: bytes, fsm_states: dict) -> dict:
if len(payload) < 27:
raise ValueError(f"FSM payload too short: {len(payload)} < 27")
ts_ms, bat_V, drive_A, jack_A, aux_A, counter, sensors = \
struct.unpack_from('<QffffhB', payload, 0)
drive_heat = jack_heat = aux_heat = 0.0
if len(payload) >= 31:
drive_heat, = struct.unpack_from('<f', payload, 27)
if len(payload) >= 39:
jack_heat, aux_heat = struct.unpack_from('<ff', payload, 31)
return {
'ts_ms': ts_ms,
'time_str': _ts_to_str(ts_ms),
'bat_V': round(bat_V, 3),
'drive_A': round(drive_A, 3),
'jack_A': round(jack_A, 3),
'aux_A': round(aux_A, 3),
'counter': counter,
'sensors_stable': sensors & 0x0F,
'sensors_raw': (sensors >> 4) & 0x0F,
'drive_heat': round(drive_heat, 2),
'jack_heat': round(jack_heat, 2),
'aux_heat': round(aux_heat, 2),
}
def _unpack_bat(payload: bytes) -> dict:
if len(payload) < 12:
raise ValueError(f"BAT payload too short: {len(payload)} < 12")
ts_ms, bat_V = struct.unpack_from('<Qf', payload, 0)
return {
'ts_ms': ts_ms,
'time_str': _ts_to_str(ts_ms),
'bat_V': round(bat_V, 3),
}
def _unpack_crash(payload: bytes) -> dict:
if len(payload) < 9:
raise ValueError(f"CRASH payload too short: {len(payload)} < 9")
ts_ms, reason = struct.unpack_from('<QB', payload, 0)
return {
'ts_ms': ts_ms,
'time_str': _ts_to_str(ts_ms),
'reset_reason': reason,
'reason_str': ESP_RESET_REASONS.get(reason, f"UNKNOWN({reason})"),
}
ESP_WAKEUP_CAUSES = {
0: 'NORMAL',
2: 'EXT0',
4: 'TIMER',
5: 'ULP',
6: 'TOUCHPAD',
7: 'ULP',
}
def _unpack_boot(payload: bytes) -> dict:
if len(payload) < 9:
raise ValueError(f"BOOT payload too short: {len(payload)} < 9")
ts_ms, boot_info = struct.unpack_from('<QB', payload, 0)
reset_reason = boot_info & 0x0F
wake_cause = (boot_info >> 4) & 0x0F
return {
'ts_ms': ts_ms,
'time_str': _ts_to_str(ts_ms),
'reset_reason': reset_reason,
'reason_str': ESP_RESET_REASONS.get(reset_reason, f"UNKNOWN({reset_reason})"),
'wake_cause': wake_cause,
'wake_str': ESP_WAKEUP_CAUSES.get(wake_cause, f"UNKNOWN({wake_cause})"),
}
def _unpack_time_set(payload: bytes) -> dict:
if len(payload) < 8:
raise ValueError(f"TIME_SET payload too short: {len(payload)} < 8")
ts_ms, = struct.unpack_from('<Q', payload, 0)
return {
'ts_ms': ts_ms,
'time_str': _ts_to_str(ts_ms),
}
def _is_valid_entry_type(t: int) -> bool:
return (0 <= t <= 12) or t in (LOG_TYPE_BAT, LOG_TYPE_CRASH, LOG_TYPE_BOOT, LOG_TYPE_TIME_SET)
def parse_entries(data: bytes, fsm_states: dict = None, type_first: bool = False) -> list:
"""
Parse a stream of raw binary log entries.
Returns list of dicts, each with 'entry_type' and type-specific fields.
Entry format depends on type_first:
False (current FW): [len u8][payload (len-1 bytes)][type u8]
True (old FW): [len u8][type u8][payload (len-1 bytes)]
In both cases total bytes consumed per entry = len + 1.
"""
if fsm_states is None:
fsm_states = _FALLBACK_FSM_STATES
entries = []
i = 0
n = len(data)
while i < n:
b = data[i]
# Erased flash or sector padding → skip to next sector
if b == 0xFF or b == 0x00:
sector_size = 4096
next_sector = ((i // sector_size) + 1) * sector_size
i = next_sector
continue
# In type_first (old FW) format, sectors have a small zero-pad header
# that isn't full-sector padding. Only skip individual zero bytes.
if type_first and b == 0x00:
i += 1
continue
entry_len = b # stored len = payload_size + 1
payload_size = entry_len - 1
end_offset = i + entry_len # last byte of this entry's content
if end_offset >= n:
break # truncated
# Detect entry format: with type byte (total = len+1) or without (total = len).
# Check if data[end_offset] is the start of the next entry (no type byte)
# vs a type byte followed by the next entry at end_offset+1.
has_type_byte = True
if end_offset + 1 < n:
next_at_len = data[end_offset] # byte right after payload
next_at_len1 = data[end_offset + 1] # byte one further
# If the byte at end_offset looks like a valid next-entry len byte
# (matches current entry len or is another plausible len), and the
# byte at end_offset+1 does NOT, then there's no type byte.
next_ok = next_at_len not in (0x00, 0xFF) and next_at_len < 250
next1_ok = next_at_len1 not in (0x00, 0xFF) and next_at_len1 < 250
if next_ok and not _is_valid_entry_type(next_at_len):
# end_offset byte isn't a valid type, treat as next entry (no type)
has_type_byte = False
elif next_ok and next_at_len == entry_len and not next1_ok:
# Same len repeating at stride=len (not len+1) → no type byte
has_type_byte = False
if not has_type_byte:
# No type byte: [len][payload], total = len bytes, FSM type implied
payload = data[i + 1 : i + entry_len]
entry_type = 0 # default to IDLE / FSM
i = end_offset # advance by len (not len+1)
elif type_first:
entry_type = data[i + 1]
payload = data[i + 2 : i + 1 + entry_len]
# Fallback: if type-first gives invalid type, try type-last
if not _is_valid_entry_type(entry_type):
alt_type = data[end_offset]
if _is_valid_entry_type(alt_type):
entry_type = alt_type
payload = data[i + 1 : i + 1 + payload_size]
i = end_offset + 1
else:
payload = data[i + 1 : i + 1 + payload_size]
entry_type = data[end_offset]
# Fallback: if type-last gives invalid type, try type-first
if not _is_valid_entry_type(entry_type):
alt_type = data[i + 1]
if _is_valid_entry_type(alt_type):
entry_type = alt_type
payload = data[i + 2 : i + 1 + entry_len]
i = end_offset + 1
try:
if 0 <= entry_type <= 12:
e = _unpack_fsm(payload, fsm_states)
e['entry_type'] = entry_type
e['state_name'] = fsm_states.get(entry_type, f"STATE_{entry_type}")
elif entry_type == LOG_TYPE_BAT:
e = _unpack_bat(payload)
e['entry_type'] = LOG_TYPE_BAT
e['state_name'] = 'BAT'
elif entry_type == LOG_TYPE_CRASH:
e = _unpack_crash(payload)
e['entry_type'] = LOG_TYPE_CRASH
e['state_name'] = 'CRASH'
elif entry_type == LOG_TYPE_BOOT:
e = _unpack_boot(payload)
e['entry_type'] = LOG_TYPE_BOOT
e['state_name'] = 'BOOT'
elif entry_type == LOG_TYPE_TIME_SET:
e = _unpack_time_set(payload)
e['entry_type'] = LOG_TYPE_TIME_SET
e['state_name'] = 'TIME_SET'
else:
e = {
'entry_type': entry_type,
'state_name': f'UNK({entry_type:#04x})',
'raw': payload.hex(),
}
except Exception as exc:
e = {
'entry_type': entry_type,
'state_name': 'PARSE_ERR',
'error': str(exc),
'raw': payload.hex(),
}
entries.append(e)
# i was already advanced in the format-detection block above
return entries
def parse_response(blob: bytes, fsm_states: dict = None) -> tuple:
"""
Parse a full HTTP /log response blob.
Returns (json_meta: dict, tail: int, head: int, entries: list).
"""
if len(blob) < 8:
raise ValueError("Response too short")
# Detect HTML response (device served webpage instead of binary log)
if blob[:5] in (b'<!DOC', b'<!doc', b'<html', b'<HTML'):
raise ValueError("Got HTML instead of binary log — check URL resolves to /log endpoint")
json_len = struct.unpack_from('>I', blob, 0)[0]
if json_len > 65536 or len(blob) < 4 + json_len + 8:
raise ValueError(f"Invalid json_len {json_len} (expected binary log format, got {blob[:20]})")
json_bytes = blob[4 : 4 + json_len]
meta = json.loads(json_bytes.decode('utf-8'))
tail, head = struct.unpack_from('>II', blob, 4 + json_len)
binary = blob[4 + json_len + 8:]
entries = parse_entries(binary, fsm_states)
return meta, tail, head, entries
def _detect_old_partition_dump(blob: bytes) -> int:
"""
Detect old firmware partition dump format.
Old format: 8-byte file header + 0x4000 bytes params + log entries
with type byte at the start of each entry's content region.
Returns the log data start offset, or 0 if not detected.
"""
if len(blob) < 0x4100:
return 0
# Check if offset 0x4000 looks like a log sector: leading zero-pad
# followed by a valid entry with a valid type byte at +1 (type-first format)
base = 0x4000
# Find first non-zero byte in the sector
first_nz = 0
while first_nz < 4096 and blob[base + first_nz] == 0x00:
first_nz += 1
if first_nz >= 4096:
return 0
entry_len = blob[base + first_nz]
if entry_len < 2 or base + first_nz + 1 + entry_len > len(blob):
return 0
# In old format, the type byte is the first byte after the len byte
entry_type = blob[base + first_nz + 1]
if _is_valid_entry_type(entry_type):
return base
return 0
def _try_detect_type_first(data: bytes) -> bool:
"""
Given raw log entry data, try to determine if entries use
type-first format (old FW) vs type-last format (current FW).
Samples multiple entries and checks which placement yields
valid entry types, plausible timestamps, or reasonable voltages.
"""
i = 0
n = len(data)
attempts = 0
max_attempts = 200
while i < n and attempts < max_attempts:
b = data[i]
if b == 0xFF:
break
if b == 0x00:
i = ((i // 4096) + 1) * 4096
continue
entry_len = b
end_offset = i + entry_len
if end_offset >= n:
break
# type-last (current): type is at end_offset
type_last = data[end_offset]
# type-first (old): type is at i+1
type_first_val = data[i + 1]
last_valid = _is_valid_entry_type(type_last)
first_valid = _is_valid_entry_type(type_first_val)
if first_valid and not last_valid:
return True
if last_valid and not first_valid:
return False
# Both valid or neither — try parsing the payload to disambiguate
if first_valid and last_valid:
payload_first = data[i + 2 : i + 1 + entry_len]
payload_last = data[i + 1 : i + 1 + entry_len - 1]
for payload, is_first in [(payload_first, True), (payload_last, False)]:
if len(payload) >= 12:
ts = struct.unpack_from('<Q', payload, 0)[0]
# Plausible if timestamp is 2020-2030 in ms
if 1577836800000 < ts < 1893456000000:
return is_first
# Also check if the float at offset 8 is a reasonable voltage (0-60V)
v = struct.unpack_from('<f', payload, 8)[0]
if 0.5 < v < 60.0:
return is_first
# Advance to next entry and keep trying
i = end_offset + 1
attempts += 1
return False
def autodetect_and_parse(blob: bytes, fsm_states: dict = None) -> tuple:
"""
Auto-detect whether blob is HTTP response format, old partition dump,
or raw flash binary.
Returns (json_meta_or_None, tail_or_None, head_or_None, entries).
"""
# HTTP format: first 4 bytes = BE uint32 json_len, byte 4 should be '{'
if len(blob) >= 5:
candidate_len = struct.unpack_from('>I', blob, 0)[0]
if candidate_len < len(blob) and blob[4:5] == b'{':
meta, tail, head, entries = parse_response(blob, fsm_states)
return meta, tail, head, entries
# Bare tail+head format: [4B tail BE][4B head BE][raw log data]
# Detect by checking if head - tail == len(blob) - 8
if len(blob) >= 16:
tail_val, head_val = struct.unpack_from('>II', blob, 0)
if head_val > tail_val and (head_val - tail_val) == len(blob) - 8:
log_data = blob[8:]
type_first = _try_detect_type_first(log_data)
entries = parse_entries(log_data, fsm_states, type_first=type_first)
return None, tail_val, head_val, entries
# Old partition dump: 8-byte header + 0x4000 params + log entries (type-first)
log_offset = _detect_old_partition_dump(blob)
if log_offset > 0:
log_data = blob[log_offset:]
type_first = _try_detect_type_first(log_data)
entries = parse_entries(log_data, fsm_states, type_first=type_first)
return None, None, None, entries
# Raw binary — auto-detect type placement
type_first = _try_detect_type_first(blob)
entries = parse_entries(blob, fsm_states, type_first=type_first)
return None, None, None, entries