System Memory Analyzer
Objective
Write a Python 3 command-line tool that reads raw memory statistics from /proc/meminfo, parses the key-value data, calculates derived metrics (used memory, free percentage, swap utilization, buffer/cache breakdown), and generates a formatted text or JSON report. The tool supports a --watch mode that samples memory at a specified interval, a --threshold flag that exits with a non-zero code if used memory exceeds a limit (useful in monitoring scripts), and a --json flag for machine-readable output.
Tools & Technologies
Python 3.10+— language/proc/meminfo— Linux kernel memory statistics interfaceargparse— CLI argument parsing (stdlib)json— JSON serialization for machine output (stdlib)time.sleep()— interval-based sampling in watch modesys.exit()— threshold-based exit codes for monitoring integrationunittest.mock— patching/proc/meminfoin testsdataclasses— typed MemInfo data container
Architecture Overview
Step-by-Step Process
Implemented a parser that reads the raw /proc/meminfo file and returns a dictionary mapping field names to integer values in kibibytes (kB).
#!/usr/bin/env python3
"""mem_analyzer.py — Linux /proc/meminfo parser and reporter"""
from __future__ import annotations
import argparse, json, sys, time
from dataclasses import dataclass, field, asdict
from pathlib import Path
from typing import Optional
MEMINFO_PATH = Path('/proc/meminfo')
def parse_meminfo(path: Path = MEMINFO_PATH) -> dict[str, int]:
"""
Parse /proc/meminfo into a dict of {field: value_in_kB}.
Example line: 'MemTotal: 32768000 kB'
"""
mem = {}
with path.open() as fh:
for line in fh:
parts = line.split()
if len(parts) >= 2:
key = parts[0].rstrip(':')
try:
mem[key] = int(parts[1]) # value in kB
except ValueError:
pass
return mem
Created a dataclass to hold all raw and computed memory fields. The __post_init__ method calculates derived values like used, available_percent, and swap utilization.
@dataclass
class MemInfo:
total_kb: int
free_kb: int
available_kb: int
buffers_kb: int
cached_kb: int
swap_total_kb: int
swap_free_kb: int
# Derived
used_kb: int = field(init=False)
used_percent: float = field(init=False)
available_percent: float = field(init=False)
swap_used_kb: int = field(init=False)
swap_used_percent: float = field(init=False)
def __post_init__(self):
# "used" = total - free - buffers - cached (matches 'free' command)
self.used_kb = self.total_kb - self.free_kb - self.buffers_kb - self.cached_kb
self.used_percent = (self.used_kb / self.total_kb * 100) if self.total_kb else 0.0
self.available_percent = (self.available_kb / self.total_kb * 100) if self.total_kb else 0.0
self.swap_used_kb = self.swap_total_kb - self.swap_free_kb
self.swap_used_percent = (
self.swap_used_kb / self.swap_total_kb * 100
) if self.swap_total_kb else 0.0
@classmethod
def from_dict(cls, d: dict[str, int]) -> 'MemInfo':
return cls(
total_kb=d.get('MemTotal', 0),
free_kb=d.get('MemFree', 0),
available_kb=d.get('MemAvailable', 0),
buffers_kb=d.get('Buffers', 0),
cached_kb=d.get('Cached', 0) + d.get('SReclaimable', 0),
swap_total_kb=d.get('SwapTotal', 0),
swap_free_kb=d.get('SwapFree', 0),
)
def to_mb(self, kb: int) -> float:
return kb / 1024
def to_gb(self, kb: int) -> float:
return kb / 1024 / 1024
Wrote two report formatters: a human-readable table using f-strings, and a JSON serializer using the dataclass asdict() method.
def bar(percent: float, width: int = 30) -> str:
"""ASCII progress bar."""
filled = int(width * percent / 100)
return '[' + '#' * filled + '-' * (width - filled) + f'] {percent:5.1f}%'
def format_text_report(m: MemInfo) -> str:
lines = [
"=" * 50,
" SYSTEM MEMORY REPORT",
"=" * 50,
f" Total RAM : {m.to_gb(m.total_kb):>7.2f} GB",
f" Used : {m.to_gb(m.used_kb):>7.2f} GB {bar(m.used_percent)}",
f" Available : {m.to_gb(m.available_kb):>7.2f} GB {bar(m.available_percent)}",
f" Buffers : {m.to_mb(m.buffers_kb):>7.1f} MB",
f" Cache : {m.to_mb(m.cached_kb):>7.1f} MB",
"-" * 50,
]
if m.swap_total_kb > 0:
lines += [
f" Swap Total : {m.to_gb(m.swap_total_kb):>7.2f} GB",
f" Swap Used : {m.to_gb(m.swap_used_kb):>7.2f} GB {bar(m.swap_used_percent)}",
]
else:
lines.append(" Swap: not configured")
lines.append("=" * 50)
return '\n'.join(lines)
def format_json_report(m: MemInfo) -> str:
data = asdict(m)
data['timestamp'] = time.strftime('%Y-%m-%dT%H:%M:%S')
return json.dumps(data, indent=2)
Built the CLI with --watch for continuous sampling, --threshold for alerting, and --json for scripting integration.
def build_parser() -> argparse.ArgumentParser:
p = argparse.ArgumentParser(
prog='mem_analyzer',
description='Analyze Linux system memory from /proc/meminfo'
)
p.add_argument('--json', action='store_true', help='Output JSON')
p.add_argument('--watch', type=float, metavar='SECONDS',
help='Repeat every N seconds (Ctrl+C to stop)')
p.add_argument('--threshold', type=float, metavar='PERCENT',
help='Exit code 2 if used%% exceeds this value')
p.add_argument('--meminfo', type=Path, default=MEMINFO_PATH,
help='Path to meminfo file (default: /proc/meminfo)')
return p
def main():
args = build_parser().parse_args()
def run_once():
raw = parse_meminfo(args.meminfo)
mem = MemInfo.from_dict(raw)
if args.json:
print(format_json_report(mem))
else:
print(format_text_report(mem))
if args.threshold is not None and mem.used_percent > args.threshold:
print(f"\nWARNING: Memory usage {mem.used_percent:.1f}% exceeds "
f"threshold {args.threshold:.1f}%", file=sys.stderr)
sys.exit(2)
if args.watch:
try:
while True:
run_once()
time.sleep(args.watch)
except KeyboardInterrupt:
print("\nStopped.")
else:
run_once()
if __name__ == '__main__':
main()
Used unittest.mock.patch to feed a synthetic meminfo string to the parser in tests, decoupling the test suite from the actual system memory state.
import pytest
from pathlib import Path
from unittest.mock import patch, mock_open
from mem_analyzer import parse_meminfo, MemInfo
FAKE_MEMINFO = """MemTotal: 16384000 kB
MemFree: 4096000 kB
MemAvailable: 8192000 kB
Buffers: 512000 kB
Cached: 2048000 kB
SReclaimable: 256000 kB
SwapTotal: 4096000 kB
SwapFree: 2048000 kB
"""
def test_parse_meminfo():
with patch('builtins.open', mock_open(read_data=FAKE_MEMINFO)):
result = parse_meminfo(Path('/proc/meminfo'))
assert result['MemTotal'] == 16384000
assert result['SwapFree'] == 2048000
def test_meminfo_used_calculation():
raw = {
'MemTotal': 16384000, 'MemFree': 4096000, 'MemAvailable': 8192000,
'Buffers': 512000, 'Cached': 2048000, 'SReclaimable': 256000,
'SwapTotal': 4096000, 'SwapFree': 2048000
}
m = MemInfo.from_dict(raw)
# used = total - free - buffers - (cached + sreclaimable)
expected_used = 16384000 - 4096000 - 512000 - (2048000 + 256000)
assert m.used_kb == expected_used
assert 0 < m.used_percent < 100
assert m.swap_used_percent == 50.0
Complete Workflow
Challenges & Solutions
- Discrepancy between tool output and
freecommand — The Linuxfreecommand includesSReclaimablein its cached figure. Had to addSReclaimableto the cached total to match thefreeoutput exactly. - Watch mode leaving terminal in a bad state on interrupt —
KeyboardInterruptwas bubbling past the print statement, leaving partial output. Added a try/except around the watch loop with a clean "Stopped." message. - Test failures on systems with no swap — When
SwapTotal = 0, calculatingswap_used_percentcaused a ZeroDivisionError. Added a conditional: if swap_total is 0, set all swap percentages to 0.0. - Dataclass field ordering with
field(init=False)— Python dataclasses require fields with defaults (init=False) to appear after fields without defaults. Restructured the class to separate raw (init=True) and derived (init=False) fields.
Key Takeaways
/proc/meminfois one of many Linux virtual filesystem interfaces that expose kernel data as plain text — parsing these files directly is how low-level monitoring tools likefree,top, andvmstatwork internally.- Dataclasses with
__post_init__are an excellent pattern for data that requires derived fields — the computation is centralized and typed, unlike scattered calculations throughout the code. - Exit codes are a contract between programs — using code 2 for threshold violations makes the tool composable with Nagios/Icinga monitoring systems that expect 0 (OK), 1 (WARNING), 2 (CRITICAL).
- Mocking file I/O in tests decouples unit tests from the runtime environment, making the test suite portable and deterministic across different machines with different memory configurations.