cheat sheet
Python CLI Snippets
Short standalone Python scripts for common tasks — file I/O, JSON manipulation, HTTP requests, CSV processing, and data transformation.
Python CLI Snippets
Standalone scripts — save each to a .py file and run with python script.py.
File I/O — read, filter, write
Read a text file, filter lines containing a keyword, and write results to a new file.
#!/usr/bin/env python3
# filter_lines.py
import sys
keyword = sys.argv[1] if len(sys.argv) > 1 else "error"
src = sys.argv[2] if len(sys.argv) > 2 else "input.txt"
dst = sys.argv[3] if len(sys.argv) > 3 else "output.txt"
with open(src) as f:
lines = f.readlines()
matched = [l for l in lines if keyword.lower() in l.lower()]
with open(dst, "w") as f:
f.writelines(matched)
print(f"Filtered {len(matched)}/{len(lines)} lines → {dst}")
# Create a sample input
printf "INFO: server started\nERROR: connection refused\nINFO: done\nERROR: timeout\n" > input.txt
python filter_lines.py error input.txt errors.txt
Output:
Filtered 2/4 lines → errors.txt
errors.txt contents:
ERROR: connection refused
ERROR: timeout
JSON — load, transform, dump
Read a JSON file, add/transform a field, and write the result back out.
#!/usr/bin/env python3
# transform_json.py
import json
import sys
from pathlib import Path
src = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("data.json")
dst = Path(sys.argv[2]) if len(sys.argv) > 2 else Path("data_out.json")
records = json.loads(src.read_text())
# Add a computed field to every record
for rec in records:
rec["label"] = f"{rec['name']} (age {rec['age']})"
dst.write_text(json.dumps(records, indent=2))
print(f"Wrote {len(records)} records to {dst}")
# Create sample input
echo '[{"name":"Alice","age":30},{"name":"Bob","age":25}]' > data.json
python transform_json.py data.json data_out.json
cat data_out.json
Output:
Wrote 2 records to data_out.json
data_out.json contents:
[
{
"name": "Alice",
"age": 30,
"label": "Alice (age 30)"
},
{
"name": "Bob",
"age": 25,
"label": "Bob (age 25)"
}
]
HTTP request — stdlib (no dependencies)
Fetch a URL and print the response body using only the standard library.
#!/usr/bin/env python3
# fetch.py
import json
import sys
import urllib.request
url = sys.argv[1] if len(sys.argv) > 1 else "https://httpbin.org/get"
with urllib.request.urlopen(url, timeout=10) as resp:
body = resp.read().decode()
status = resp.status
print(f"Status: {status}")
try:
parsed = json.loads(body)
print(json.dumps(parsed.get("headers", parsed), indent=2))
except json.JSONDecodeError:
print(body[:500])
python fetch.py https://httpbin.org/get
Output:
Status: 200
{
"Accept": "*/*",
"Accept-Encoding": "identity",
"Host": "httpbin.org",
"User-Agent": "Python-urllib/3.12"
}
For POST, auth, session reuse, or retries, use requests or httpx instead.
HTTP request — with requests
#!/usr/bin/env python3
# post_json.py
import json, sys
import requests
url = "https://httpbin.org/post"
payload = {"action": "test", "value": 42}
resp = requests.post(url, json=payload, timeout=10)
resp.raise_for_status()
result = resp.json()
print(f"Status: {resp.status_code}")
print("Echo'd JSON:", json.dumps(result["json"], indent=2))
python post_json.py
Output:
Status: 200
Echo'd JSON: {
"action": "test",
"value": 42
}
CSV processing — compute summary stats
Read a CSV file and print per-column min, max, and mean for numeric columns.
#!/usr/bin/env python3
# csv_stats.py
import csv, sys
from collections import defaultdict
path = sys.argv[1] if len(sys.argv) > 1 else "data.csv"
columns: dict[str, list[float]] = defaultdict(list)
with open(path, newline="") as f:
reader = csv.DictReader(f)
for row in reader:
for key, val in row.items():
try:
columns[key].append(float(val))
except (ValueError, TypeError):
pass # skip non-numeric columns
print(f"{'Column':<20} {'Min':>10} {'Max':>10} {'Mean':>10}")
print("-" * 54)
for col, values in columns.items():
print(f"{col:<20} {min(values):>10.2f} {max(values):>10.2f} {sum(values)/len(values):>10.2f}")
printf "name,age,salary\nAlice,30,90000\nBob,25,75000\nCarol,35,105000\n" > data.csv
python csv_stats.py data.csv
Output:
Column Min Max Mean
------------------------------------------------------
age 25.00 35.00 30.00
salary 75000.00 105000.00 90000.00
Data processing — word frequency counter
Count word frequencies in a text file and print the top N.
#!/usr/bin/env python3
# word_freq.py
import re, sys
from collections import Counter
from pathlib import Path
path = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("text.txt")
top_n = int(sys.argv[2]) if len(sys.argv) > 2 else 10
text = path.read_text(encoding="utf-8", errors="ignore").lower()
words = re.findall(r"\b[a-z]{3,}\b", text) # only words ≥ 3 chars
counter = Counter(words)
print(f"Top {top_n} words in {path.name} ({len(words)} total):\n")
for word, count in counter.most_common(top_n):
bar = "█" * (count * 20 // counter.most_common(1)[0][1])
print(f" {word:<15} {count:>5} {bar}")
echo "the quick brown fox jumps over the lazy dog the fox" > text.txt
python word_freq.py text.txt 5
Output:
Top 5 words in text.txt (10 total):
the 3 ████████████████████
fox 2 █████████████
quick 1 ██████
brown 1 ██████
jumps 1 ██████
Walk a directory tree
Print all .py files under a directory with their sizes.
#!/usr/bin/env python3
# find_py.py
import sys
from pathlib import Path
root = Path(sys.argv[1]) if len(sys.argv) > 1 else Path(".")
total = 0
for p in sorted(root.rglob("*.py")):
size = p.stat().st_size
total += size
print(f"{size:>8,} B {p}")
print(f"\nTotal: {total:,} bytes across {sum(1 for _ in root.rglob('*.py'))} files")
python find_py.py src/
Output:
4,821 B src/app.py
1,203 B src/utils.py
847 B src/models.py
Total: 6,871 bytes across 3 files
Argparse boilerplate — the 30-second template
Almost every Python CLI starts with the same argparse scaffold: a parser, a positional input, a couple of optional flags, and a main() function that returns an exit code. Copy-paste this and edit. For the deep version see argparse; for typed alternatives see click and typer.
#!/usr/bin/env python3
# cli_template.py
"""One-line description of the tool."""
from __future__ import annotations
import argparse
import sys
from pathlib import Path
def build_parser() -> argparse.ArgumentParser:
p = argparse.ArgumentParser(prog="mytool", description=__doc__)
p.add_argument("input", type=Path, help="Input file")
p.add_argument("-o", "--output", type=Path, default=Path("-"),
help="Output file or '-' for stdout (default: stdout)")
p.add_argument("-v", "--verbose", action="count", default=0,
help="Increase verbosity (-v, -vv, -vvv)")
p.add_argument("-n", "--dry-run", action="store_true")
return p
def main(argv: list[str] | None = None) -> int:
args = build_parser().parse_args(argv)
if args.verbose:
print(f"args: {args}", file=sys.stderr)
if not args.input.is_file():
print(f"error: {args.input} not found", file=sys.stderr)
return 2
if args.dry_run:
print("(dry run; no changes)")
return 0
# ... real work ...
return 0
if __name__ == "__main__":
sys.exit(main())
python cli_template.py data.txt -o out.txt -vv
Output:
args: Namespace(input=PosixPath('data.txt'), output=PosixPath('out.txt'), verbose=2, dry_run=False)
Returning an exit code from
main()(instead of callingsys.exit(n)inside) makes the function unit-testable.
Environment-variable configuration
A robust CLI reads configuration from three sources, in increasing precedence: built-in defaults → environment variables → command-line flags. The pattern below uses os.environ.get with a fallback as the default= for each flag, so users can MYAPP_HOST=… in their shell or --host … on the command line.
#!/usr/bin/env python3
# env_config.py
import argparse
import os
import sys
def env(name: str, fallback: str | None = None) -> dict:
return {"default": os.environ.get(name, fallback),
"help": f"(env: {name})"}
parser = argparse.ArgumentParser()
parser.add_argument("--host", **env("MYAPP_HOST", "localhost"))
parser.add_argument("--port", type=int, **env("MYAPP_PORT", "8080"))
parser.add_argument("--api-key", **env("MYAPP_API_KEY"),
required="MYAPP_API_KEY" not in os.environ)
args = parser.parse_args()
print(f"connecting to {args.host}:{args.port} (key {args.api_key[:6]}…)")
MYAPP_API_KEY=sk-1234567890 MYAPP_HOST=myhost.local python env_config.py --port 9000
Output:
connecting to myhost.local:9000 (key sk-123…)
For full .env file loading, python-dotenv reads a .env file into os.environ before argument parsing runs.
JSON in/out — stdin to stdout filter
A Unix-style filter that reads JSON from stdin, transforms it, and writes JSON to stdout. This is the shape of every jq-replacement script written in Python.
#!/usr/bin/env python3
# json_filter.py
"""Read JSON array from stdin, filter by --min-age, write JSON to stdout."""
import argparse
import json
import sys
parser = argparse.ArgumentParser()
parser.add_argument("--min-age", type=int, default=0)
parser.add_argument("--field", default="age",
help="Field name to filter on (default: age)")
args = parser.parse_args()
try:
data = json.load(sys.stdin)
except json.JSONDecodeError as e:
print(f"error: bad JSON on stdin: {e}", file=sys.stderr)
sys.exit(2)
filtered = [r for r in data if r.get(args.field, 0) >= args.min_age]
json.dump(filtered, sys.stdout, indent=2)
sys.stdout.write("\n")
echo '[{"name":"Alice","age":30},{"name":"Bob","age":17},{"name":"Carol","age":42}]' \
| python json_filter.py --min-age 18
Output:
[
{
"name": "Alice",
"age": 30
},
{
"name": "Carol",
"age": 42
}
]
Pair this with
jqfor parts of the pipeline that JSON handles natively (selecting fields, formatting) and reach for Python only when you need real logic.
CSV in/out — round-trip with a derived column
Read a CSV from stdin (or a path), compute a new column from existing ones, and write the result back out. Use csv.DictReader / csv.DictWriter for named columns and you don't have to remember positional indices.
#!/usr/bin/env python3
# csv_derive.py
import csv
import sys
from pathlib import Path
src = Path(sys.argv[1]) if len(sys.argv) > 1 else None
dst = Path(sys.argv[2]) if len(sys.argv) > 2 else None
infile = src.open(newline="") if src else sys.stdin
outfile = dst.open("w", newline="") if dst else sys.stdout
reader = csv.DictReader(infile)
fieldnames = (reader.fieldnames or []) + ["bonus"]
writer = csv.DictWriter(outfile, fieldnames=fieldnames)
writer.writeheader()
for row in reader:
row["bonus"] = round(float(row["salary"]) * 0.1, 2)
writer.writerow(row)
if src: infile.close()
if dst: outfile.close()
printf "name,salary\nAlice,90000\nBob,75000\n" | python csv_derive.py
Output:
name,salary,bonus
Alice,90000,9000.0
Bob,75000,7500.0
Coloured output — ANSI escapes (no dependency)
For trivial colour needs, you can write ANSI escape codes by hand. Modern terminals (and Windows 10+ with os.system("") first to enable VT) support them natively. Reach for rich or colorama when you need styles, tables, or cross-platform safety.
#!/usr/bin/env python3
# color_basic.py
import os
import sys
if os.name == "nt":
os.system("") # enables ANSI on Windows 10+ cmd.exe
R = "\033[31m"; G = "\033[32m"; Y = "\033[33m"; B = "\033[34m"; END = "\033[0m"
BOLD = "\033[1m"
def color(text: str, code: str) -> str:
return f"{code}{text}{END}" if sys.stdout.isatty() else text
print(color("ERROR: ", R) + "connection refused")
print(color("WARN: ", Y) + "retrying in 5s")
print(color("OK: ", G) + color("connected", BOLD))
python color_basic.py
Output:
ERROR: connection refused
WARN: retrying in 5s
OK: connected
The
sys.stdout.isatty()guard means colour codes are stripped when the output is redirected (python script.py > log.txt). Without it, your log file fills up with\033[31mgarbage.
Coloured output — rich
rich is the modern standard for terminal output: colour, tables, tracebacks, progress bars, markdown. Use it as soon as you need anything beyond plain print.
#!/usr/bin/env python3
# color_rich.py
from rich.console import Console
from rich.table import Table
console = Console()
console.print("[bold green]Success[/]: deployment complete")
console.print("[yellow]Warning[/]: 2 tests skipped")
console.log("starting…") # adds a timestamp
table = Table(title="Recent deploys")
table.add_column("Service", style="cyan")
table.add_column("Status", style="green")
table.add_column("Time", justify="right")
table.add_row("api", "ok", "1.2s")
table.add_row("worker", "failed", "2.1s")
console.print(table)
python color_rich.py
Output:
Success: deployment complete
Warning: 2 tests skipped
[14:32:01] starting…
Recent deploys
┏━━━━━━━━━┳━━━━━━━━┳━━━━━━┓
┃ Service ┃ Status ┃ Time ┃
┡━━━━━━━━━╇━━━━━━━━╇━━━━━━┩
│ api │ ok │ 1.2s │
│ worker │ failed │ 2.1s │
└─────────┴────────┴──────┘
Interactive prompts
The stdlib input() covers most one-off prompts. For password input, getpass.getpass() echoes nothing. For yes/no, write a tiny helper that handles the common variants — or use rich.prompt.Confirm / click.confirm for batteries-included.
#!/usr/bin/env python3
# prompts.py
import getpass
import sys
def ask_yn(question: str, default: bool = False) -> bool:
suffix = " [Y/n] " if default else " [y/N] "
while True:
ans = input(question + suffix).strip().lower()
if not ans:
return default
if ans in {"y", "yes"}:
return True
if ans in {"n", "no"}:
return False
print("Please answer y or n.")
name = input("Your name: ")
password = getpass.getpass("API token: ")
if not ask_yn(f"Deploy as {name}?", default=False):
print("aborted")
sys.exit(1)
print(f"deploying… (token length: {len(password)})")
python prompts.py
Output:
Your name: Alice
API token:
Deploy as Alice? [y/N] y
deploying… (token length: 32)
Never print or log the contents of
getpass.getpass(). The length is fine; the value is not.
Progress bars — tqdm
tqdm wraps any iterable into a progress bar. It auto-detects the terminal width, hides when output is redirected, and supports nested bars.
#!/usr/bin/env python3
# progress_tqdm.py
import time
from tqdm import tqdm
files = [f"file{i}.bin" for i in range(20)]
for f in tqdm(files, desc="Uploading", unit="file"):
time.sleep(0.05)
python progress_tqdm.py
Output:
Uploading: 100%|█████████████████████████| 20/20 [00:01<00:00, 19.4 file/s]
For nested loops:
from tqdm import tqdm
for batch in tqdm(range(3), desc="Batches"):
for item in tqdm(range(50), desc=f" batch {batch}", leave=False):
...
Progress bars — rich.progress
rich.progress provides a multi-task, multi-column progress display that's much fancier than tqdm when you have parallel work.
#!/usr/bin/env python3
# progress_rich.py
import time
from rich.progress import Progress, SpinnerColumn, BarColumn, TextColumn, TimeRemainingColumn
with Progress(
SpinnerColumn(),
TextColumn("[bold blue]{task.description}"),
BarColumn(),
"[progress.percentage]{task.percentage:>3.0f}%",
TimeRemainingColumn(),
) as progress:
download = progress.add_task("download", total=100)
process = progress.add_task("process", total=100)
while not progress.finished:
progress.update(download, advance=2)
progress.update(process, advance=1)
time.sleep(0.02)
python progress_rich.py
Output:
⠼ download ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% 0:00:00
⠼ process ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% 0:00:00
Logging setup — the standard 5-line config
Avoid print for anything beyond throwaway scripts. logging.basicConfig sets up the root logger; module-level logger = logging.getLogger(__name__) gives you per-module filtering. The format below covers timestamp, level, module, and message — everything you need to debug a production incident.
#!/usr/bin/env python3
# logging_setup.py
import logging
import os
logging.basicConfig(
level=os.environ.get("LOG_LEVEL", "INFO"),
format="%(asctime)s %(levelname)-8s %(name)s: %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
logger = logging.getLogger(__name__)
def do_work(n: int) -> int:
logger.debug("called with n=%s", n)
if n < 0:
logger.warning("negative input: %s", n)
return n * n
logger.info("starting")
do_work(-3)
do_work(5)
logger.info("done")
LOG_LEVEL=DEBUG python logging_setup.py
Output:
2026-05-25 14:30:00 INFO __main__: starting
2026-05-25 14:30:00 DEBUG __main__: called with n=-3
2026-05-25 14:30:00 WARNING __main__: negative input: -3
2026-05-25 14:30:00 DEBUG __main__: called with n=5
2026-05-25 14:30:00 INFO __main__: done
Use
%-style formatting in log calls (logger.info("got %d", n)), not f-strings. The message is only formatted if the level is enabled — important for hot paths.
Error handling and exit codes
A well-behaved CLI distinguishes failure modes by exit code: 0 success, 1 general error, 2 bad usage, >2 for application-specific errors. Catch expected exceptions, log unexpected ones, and propagate the right code. Anything that prints to stderr should also flush.
#!/usr/bin/env python3
# error_handling.py
import logging
import sys
from pathlib import Path
logging.basicConfig(level="INFO", format="%(levelname)s: %(message)s")
log = logging.getLogger(__name__)
EXIT_OK = 0
EXIT_USAGE = 2
EXIT_NOT_FOUND = 3
EXIT_PERMISSION = 4
def main(argv: list[str]) -> int:
if len(argv) != 1:
print("usage: tool <file>", file=sys.stderr)
return EXIT_USAGE
path = Path(argv[0])
try:
text = path.read_text()
except FileNotFoundError:
log.error("no such file: %s", path)
return EXIT_NOT_FOUND
except PermissionError:
log.error("permission denied: %s", path)
return EXIT_PERMISSION
except Exception:
log.exception("unexpected error")
return 1
print(f"read {len(text)} chars from {path}")
return EXIT_OK
if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))
python error_handling.py missing.txt
echo $?
Output:
ERROR: no such file: missing.txt
3
Signal handling — graceful shutdown on Ctrl-C
Catching SIGINT (Ctrl-C) and SIGTERM (kill default) lets your script flush state and clean up before exit. signal.signal() registers a handler; raise a sentinel exception or set a shutdown_requested flag and let your main loop check it.
#!/usr/bin/env python3
# signals.py
import signal
import sys
import time
shutdown = False
def handle(signum: int, frame) -> None:
global shutdown
name = signal.Signals(signum).name
print(f"\n[signal] {name} received; finishing up…", file=sys.stderr)
shutdown = True
signal.signal(signal.SIGINT, handle)
signal.signal(signal.SIGTERM, handle)
print("working… (Ctrl-C to stop)")
while not shutdown:
time.sleep(0.5)
print(" tick")
print("clean exit")
python signals.py
# press Ctrl-C
Output:
working… (Ctrl-C to stop)
tick
tick
^C
[signal] SIGINT received; finishing up…
clean exit
Inside async code use
loop.add_signal_handler(signal.SIGINT, callback)instead —signal.signalworks but doesn't interrupt blockingawaits reliably.
Streaming stdin line by line
Reading stdin line-by-line is the foundation of every Unix-style filter. for line in sys.stdin: iterates lazily and handles EOF correctly. Combine with argparse.FileType("r") (see argparse) or fileinput for input from files-or-stdin.
#!/usr/bin/env python3
# stdin_stream.py
"""Echo each input line prefixed with its line number and length."""
import sys
for i, line in enumerate(sys.stdin, start=1):
line = line.rstrip("\n")
print(f"{i:>4d} ({len(line):>3d}) {line}")
printf "first\nsecond line\nthird\n" | python stdin_stream.py
Output:
1 ( 5) first
2 ( 11) second line
3 ( 5) third
The fileinput module is a clever stdlib helper that iterates lines from sys.argv files or stdin, with no boilerplate:
#!/usr/bin/env python3
# fileinput_demo.py
import fileinput
for line in fileinput.input():
print(f"{fileinput.filename()}:{fileinput.lineno()}: {line.rstrip()}")
fileinput_demo.py a.txt b.txt < /dev/null
# or piped:
echo "hi" | python fileinput_demo.py
Output:
<stdin>:1: hi
Single-file scripts with PEP 723 + uv run
PEP 723 lets a single .py file declare its dependencies in an inline metadata block. uv run script.py (with uv installed) sets up a temporary virtualenv with those dependencies, runs the script, and tears down — no requirements.txt, no manual venv. This is the cleanest way to ship a standalone Python tool.
#!/usr/bin/env -S uv run --script
# /// script
# requires-python = ">=3.11"
# dependencies = [
# "httpx>=0.27",
# "rich>=13.7",
# ]
# ///
"""Fetch a URL and pretty-print its response headers."""
import sys
import httpx
from rich import print as rprint
url = sys.argv[1] if len(sys.argv) > 1 else "https://example.com"
with httpx.Client() as client:
r = client.get(url, timeout=10, follow_redirects=True)
rprint(f"[bold green]{r.status_code}[/] [cyan]{r.url}[/]")
rprint(dict(r.headers))
chmod +x fetch_headers.py
./fetch_headers.py https://httpbin.org/get
# or:
uv run fetch_headers.py https://example.com
Output:
200 https://httpbin.org/get
{'date': 'Mon, 25 May 2026 14:30:00 GMT', 'content-type': 'application/json',
'content-length': '296', 'server': 'gunicorn/19.9.0', ...}
The shebang
#!/usr/bin/env -S uv run --scriptmakes the file directly executable on Linux/macOS. The-Sflag tellsenvto split the rest as arguments (olderenvrequires a single argument).
Single-binary script with python -m venv (no uv)
If you can't install uv, a self-contained bash + Python script can manage its own venv on first run. Useful for ops scripts that need to work on locked-down hosts.
#!/usr/bin/env bash
# self_bootstrap.sh
set -euo pipefail
VENV="$HOME/.local/share/mytool/venv"
if [ ! -d "$VENV" ]; then
python3 -m venv "$VENV"
"$VENV/bin/pip" install --quiet httpx rich
fi
exec "$VENV/bin/python" "$(dirname "$0")/mytool.py" "$@"
The Python tool itself stays in mytool.py next to the bash wrapper.
Common pitfalls
printfor everything — fine for throwaway scripts; switch tologgingthe moment the script has more than one mode or is run unattended. Logs go to stderr by default; print goes to stdout.- No
if __name__ == "__main__":guard — your module's top-level code runs every time it's imported. Wrap CLI entry points inif __name__ == "__main__": sys.exit(main()). sys.exitwith non-int values —sys.exit("error message")prints the message to stderr and exits 1; this is fine but surprising. Prefer explicitprint(..., file=sys.stderr); sys.exit(1).- Forgetting
flush=Truebefore exit on signals — stdout is line-buffered on a TTY but block-buffered when redirected. A Ctrl-C between writes can lose buffered output. Either flush explicitly or useprint(..., flush=True). stdindecoding —sys.stdinuses the locale's encoding. For binary input usesys.stdin.buffer; for explicit UTF-8 setPYTHONIOENCODING=utf-8orsys.stdin.reconfigure(encoding="utf-8").- Hardcoded paths —
Path("/tmp/foo")won't exist on Windows. Usetempfile.gettempdir()orPath.home(). - Lost exit code in pipes —
set -o pipefailin bash; otherwisepython a.py | python b.pyexits withb.py's code even ifa.pyfailed. - Coloured output piped into
less— passless -R(raw ANSI) or strip codes via theisatty()guard above. - Subcommand dispatch by
if/elif— works but doesn't scale. Usesubparsers+set_defaults(func=…)(see argparse) or typer. - Loading
.envafter parsing args — load it first so env-var-backed defaults pick up the values. Order is:dotenv.load_dotenv()→argparse.parse_args().
Real-world recipes
Bulk-rename files matching a pattern
A typical ops script: walk a directory, apply a regex to each file name, preview, then rename with --apply.
#!/usr/bin/env python3
# bulk_rename.py
import argparse
import re
import sys
from pathlib import Path
parser = argparse.ArgumentParser()
parser.add_argument("root", type=Path)
parser.add_argument("--from", dest="pattern", required=True, help="Regex to match")
parser.add_argument("--to", dest="replacement", required=True, help="Replacement")
parser.add_argument("--apply", action="store_true", help="Without this, only preview")
args = parser.parse_args()
rx = re.compile(args.pattern)
renames = []
for p in args.root.rglob("*"):
if p.is_file() and rx.search(p.name):
new = p.with_name(rx.sub(args.replacement, p.name))
if new != p:
renames.append((p, new))
for old, new in renames:
print(f"{old} → {new}")
if not args.apply:
print(f"\n(preview) {len(renames)} files would be renamed; pass --apply to do it")
sys.exit(0)
for old, new in renames:
old.rename(new)
print(f"renamed {len(renames)} files")
python bulk_rename.py ./photos --from '^IMG_' --to 'vacation_'
python bulk_rename.py ./photos --from '^IMG_' --to 'vacation_' --apply
Output:
photos/IMG_0001.jpg → photos/vacation_0001.jpg
photos/IMG_0002.jpg → photos/vacation_0002.jpg
(preview) 2 files would be renamed; pass --apply to do it
Tail a log file and alert on patterns
Stream a growing log with subprocess's tail -F, look for patterns, and ring the terminal bell on hits. A 30-line on-call monitor.
#!/usr/bin/env python3
# log_alert.py
import re
import subprocess
import sys
PATTERNS = {
re.compile(r"\bERROR\b"): "\033[31m", # red
re.compile(r"\bWARN(ING)?\b"): "\033[33m", # yellow
re.compile(r"\bOOM\b"): "\033[35m", # magenta
}
END = "\033[0m"
def colorize(line: str) -> str:
for rx, code in PATTERNS.items():
if rx.search(line):
return f"{code}{line}{END}"
return line
path = sys.argv[1] if len(sys.argv) > 1 else "/var/log/app.log"
with subprocess.Popen(
["tail", "-F", path],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
bufsize=1,
) as proc:
try:
for line in proc.stdout:
sys.stdout.write(colorize(line))
if re.search(r"\bERROR\b|\bOOM\b", line):
sys.stdout.write("\a") # terminal bell
sys.stdout.flush()
except KeyboardInterrupt:
proc.terminate()
python log_alert.py /var/log/app.log
Output:
2026-05-25 14:55:00 INFO request 200 /api/users
2026-05-25 14:55:01 ERROR connection refused to db.internal
2026-05-25 14:55:02 WARN retrying in 5s
Parallel HTTP downloader with progress
Combine httpx's async client with tqdm for a 30-line concurrent downloader. Cross-references asyncio.
#!/usr/bin/env python3
# parallel_dl.py
import asyncio
import sys
from pathlib import Path
import httpx
from tqdm.asyncio import tqdm
async def download(client: httpx.AsyncClient, url: str, dst: Path):
async with client.stream("GET", url) as r:
r.raise_for_status()
total = int(r.headers.get("content-length", 0))
with dst.open("wb") as f, tqdm(
total=total, unit="B", unit_scale=True, desc=dst.name, leave=False
) as bar:
async for chunk in r.aiter_bytes():
f.write(chunk)
bar.update(len(chunk))
async def main(urls: list[str], out_dir: Path):
out_dir.mkdir(parents=True, exist_ok=True)
async with httpx.AsyncClient(timeout=30) as client:
await asyncio.gather(*(
download(client, url, out_dir / Path(url).name) for url in urls
))
if __name__ == "__main__":
asyncio.run(main(sys.argv[1:], Path("downloads")))
python parallel_dl.py \
https://speed.cloudflare.com/__down?bytes=1000000 \
https://speed.cloudflare.com/__down?bytes=2000000
Output:
__down: 100%|██████| 1.00M/1.00M [00:00<00:00, 4.21MB/s]
__down: 100%|██████| 2.00M/2.00M [00:00<00:00, 5.13MB/s]
CSV → SQLite ingest with --dry-run
Read a CSV, infer column types from the header, create a SQLite table, and bulk-insert. Useful for ad-hoc analysis where loading the file into polars or pandas is overkill.
#!/usr/bin/env python3
# csv_to_sqlite.py
import argparse
import csv
import sqlite3
from pathlib import Path
parser = argparse.ArgumentParser()
parser.add_argument("csv", type=Path)
parser.add_argument("db", type=Path)
parser.add_argument("--table", required=True)
parser.add_argument("--dry-run", action="store_true")
args = parser.parse_args()
with args.csv.open(newline="") as f:
reader = csv.reader(f)
cols = next(reader)
rows = list(reader)
quoted_cols = ", ".join(f'"{c}" TEXT' for c in cols)
ddl = f'CREATE TABLE IF NOT EXISTS "{args.table}" ({quoted_cols})'
placeholders = ",".join("?" * len(cols))
dml = f'INSERT INTO "{args.table}" VALUES ({placeholders})'
if args.dry_run:
print(ddl)
print(f"-- would insert {len(rows)} rows")
else:
with sqlite3.connect(args.db) as conn:
conn.execute(ddl)
conn.executemany(dml, rows)
print(f"inserted {len(rows)} rows into {args.db}:{args.table}")
printf "name,age\nAlice,30\nBob,25\n" > people.csv
python csv_to_sqlite.py people.csv people.db --table people
sqlite3 people.db 'SELECT * FROM people'
Output:
inserted 2 rows into people.db:people
Alice|30
Bob|25
Cross-tool pipeline — Python wrapping a shell command
A real-world Popen wrapper from subprocess: run git log --pretty, parse each line into a dict, and emit JSON. The pattern of "shell out, parse, re-emit structured data" is the core of every Python ops script.
#!/usr/bin/env python3
# git_log_json.py
import json
import subprocess
import sys
fmt = "%H%x09%an%x09%ad%x09%s"
out = subprocess.run(
["git", "log", f"--pretty=format:{fmt}", "--date=iso-strict", "-50"],
capture_output=True, text=True, check=True,
).stdout
records = []
for line in out.splitlines():
sha, author, date, msg = line.split("\t", 3)
records.append({"sha": sha[:10], "author": author, "date": date, "msg": msg})
json.dump(records, sys.stdout, indent=2)
sys.stdout.write("\n")
python git_log_json.py | jq '.[0]'
Output:
{
"sha": "7c4f3a2b9d",
"author": "Alice Dev",
"date": "2026-05-25T14:30:00-05:00",
"msg": "Add async retry helper"
}
See also
- argparse — full reference for the stdlib parser used in nearly every snippet here
- click — decorator-based alternative with prompts and confirms built in
- typer — type-hint-based CLI framework (FastAPI for the terminal)
- rich — terminal styling, tables, progress, tracebacks
- tqdm — progress bars for any iterable
- subprocess — running other commands from inside Python
- itertools-functools — building blocks that compose with these snippets
- pathlib — modern path manipulation used throughout