#!/usr/bin/env python3 """ Enhanced HTML/CSS/JS Minifier and Compressor This script provides multiple levels of optimization for embedded web pages """ import minify_html import gzip import brotli import re import sys from pathlib import Path # Try to import optional JavaScript minifier try: import jsmin HAS_JSMIN = True except ImportError: HAS_JSMIN = False print("Warning: jsmin not installed. Install with: pip install jsmin --break-system-packages") # Try to import rjsmin (faster alternative) try: import rjsmin HAS_RJSMIN = True except ImportError: HAS_RJSMIN = False def extract_and_minify_js(html_content): """Extract JavaScript, minify it, and reinsert into HTML""" if not (HAS_JSMIN or HAS_RJSMIN): return html_content # Find all script tags script_pattern = re.compile(r'', re.DOTALL) def minify_script_content(match): script_content = match.group(1) # Skip if empty or too small if len(script_content.strip()) < 50: return match.group(0) try: # Use rjsmin if available (faster), otherwise jsmin if HAS_RJSMIN: minified = rjsmin.jsmin(script_content) elif HAS_JSMIN: minified = jsmin.jsmin(script_content) else: minified = script_content return f'' except Exception as e: print(f"Warning: JS minification failed: {e}") return match.group(0) return script_pattern.sub(minify_script_content, html_content) def aggressive_html_minify(html_content): """Apply aggressive minification strategies""" # First pass: minify JavaScript html_content = extract_and_minify_js(html_content) # Use minify_html library minified = minify_html.minify( html_content, minify_js=True, minify_css=True, keep_comments=False, keep_html_and_head_opening_tags=False, keep_closing_tags=True, # Safer - can set to False for more aggressive remove_processing_instructions=True, remove_bangs=False, # Keep #do_not_minify_doctype=True, #ensure_spec_compliant_unquoted_attribute_values=True, #keep_spaces_between_attributes=False, ) return minified def generate_c_header(data, variable_name="html_content", use_progmem=True): """Generate C/C++ header file with compressed data""" lines = [] # Add header guard guard = variable_name.upper() + "_H" lines.append(f"#ifndef {guard}") lines.append(f"#define {guard}") lines.append("") # Add includes if using PROGMEM if use_progmem: lines.append("#include ") lines.append("") # Add array declaration progmem_keyword = "PROGMEM " if use_progmem else "" lines.append(f"const unsigned char {progmem_keyword}{variable_name}[] = {{") # Format bytes in rows of 16 hex_bytes = [f'0x{byte:02x}' for byte in data] for i in range(0, len(hex_bytes), 16): row = hex_bytes[i:i+16] lines.append(" " + ", ".join(row) + ",") lines.append("};") lines.append("") lines.append(f"const unsigned int {variable_name}_len = {len(data)};") lines.append("") lines.append(f"#endif // {guard}") return "\n".join(lines) def print_compression_stats(original_size, minified_size, gzip_size, brotli_size=None): """Print compression statistics""" print("\n" + "="*60) print("COMPRESSION STATISTICS") print("="*60) print(f"Original HTML: {original_size:,} bytes") print(f"Minified HTML: {minified_size:,} bytes ({minified_size/original_size*100:.1f}%)") print(f"Gzip (level 9): {gzip_size:,} bytes ({gzip_size/original_size*100:.1f}%)") if brotli_size: print(f"Brotli (level 11): {brotli_size:,} bytes ({brotli_size/original_size*100:.1f}%)") print(f"\nSavings (gzip): {original_size - gzip_size:,} bytes ({(1-gzip_size/original_size)*100:.1f}% reduction)") if brotli_size: print(f"Savings (brotli): {original_size - brotli_size:,} bytes ({(1-brotli_size/original_size)*100:.1f}% reduction)") print("="*60 + "\n") def main(): input_file = "webpage.html" # Check if input file exists if not Path(input_file).exists(): print(f"Error: {input_file} not found") sys.exit(1) # Read original HTML print(f"Reading {input_file}...") with open(input_file, "r", encoding="utf-8") as fin: original_html = fin.read() original_size = len(original_html.encode('utf-8')) # Minify HTML print("Minifying HTML/CSS/JS...") minified_html = aggressive_html_minify(original_html) minified_size = len(minified_html.encode('utf-8')) # Save minified HTML with open("webpage_minified.html", "w", encoding="utf-8") as fout: fout.write(minified_html) print("Saved: webpage_minified.html") # Compress with gzip print("Compressing with gzip (level 9)...") minified_bytes = minified_html.encode('utf-8') gzipped_bytes = gzip.compress(minified_bytes, compresslevel=9) gzip_size = len(gzipped_bytes) # Compress with brotli (if available) brotli_size = None brotli_bytes = None try: print("Compressing with brotli (level 11)...") brotli_bytes = brotli.compress(minified_bytes, quality=11) brotli_size = len(brotli_bytes) except Exception as e: print(f"Brotli compression not available: {e}") # Generate C headers print("\nGenerating C header files...") # Gzip version with open("webpage_gzip.h", "w") as fout: fout.write(generate_c_header(gzipped_bytes, "html_content_gz", use_progmem=True)) print("Saved: webpage_gzip.h") # Brotli version (if available) if brotli_bytes: with open("webpage_brotli.h", "w") as fout: fout.write(generate_c_header(brotli_bytes, "html_content_br", use_progmem=True)) print("Saved: webpage_brotli.h") # Also generate the old format for compatibility with open("webpage.h", "w") as fout: fout.write("const char html_content[] = {") fout.write(','.join(f'0x{byte:02x}' for byte in gzipped_bytes)) fout.write("};\n\n") fout.write(f"const unsigned int html_content_len = {len(gzipped_bytes)};\n") print("Saved: webpage.h (legacy format)") # Print statistics print_compression_stats(original_size, minified_size, gzip_size, brotli_size) # Recommendations print("RECOMMENDATIONS:") print("-" * 60) if brotli_size and brotli_size < gzip_size: savings = gzip_size - brotli_size print(f"Use Brotli compression (saves {savings} bytes vs gzip)") print(f" Include: webpage_brotli.h") else: print(f"Use Gzip compression") print(f" Include: webpage_gzip.h") if not HAS_RJSMIN and not HAS_JSMIN: print("\nInstall rjsmin for better JS compression:") print(" pip install rjsmin --break-system-packages") print("-" * 60) if __name__ == "__main__": main()