#!/usr/bin/env python3
r"""
Split a PayPal "Activity Download" CSV into:
1) GBP purchases/refunds (excluding funding and FX rows)
2) USD purchases/refunds (excluding FX rows)
3) FX reference table (GBP->USD conversions for USD purchases)
Works with the PayPal CSV format that has columns like:
Date, Time, Time Zone, Description, Currency, Gross, Fee, Net, Balance, Transaction ID, Reference Txn ID, Name, ...
Usage (Windows):
py -3 paypal_split_by_currency.py "C:\path\PayPal.CSV"
Outputs files next to the input CSV by default.
"""
import argparse
import csv
from collections import Counter
from pathlib import Path
EXCLUDE_GBP_DESCRIPTIONS = {
"Bank Deposit to PP Account",
"User Initiated Withdrawal",
"General Currency Conversion",
}
EXCLUDE_USD_DESCRIPTIONS = {
"General Currency Conversion",
}
USD_PAYMENT_DESCRIPTIONS = {
"Pre-approved Payment Bill User Payment",
"Express Checkout Payment",
"General Payment",
}
def parse_float(s: str) -> float:
s = (s or "").strip()
if s == "":
return 0.0
return float(s)
def read_rows(csv_path: Path):
with csv_path.open("r", encoding="utf-8-sig", newline="") as f:
reader = csv.DictReader(f)
rows = list(reader)
if not rows:
raise SystemExit("CSV appears to be empty.")
return reader.fieldnames, rows
def write_rows(out_path: Path, fieldnames, rows):
out_path.parent.mkdir(parents=True, exist_ok=True)
with out_path.open("w", encoding="utf-8", newline="") as f:
w = csv.DictWriter(f, fieldnames=fieldnames)
w.writeheader()
w.writerows(rows)
def main():
ap = argparse.ArgumentParser()
ap.add_argument("csv", type=Path, help="PayPal activity CSV file")
ap.add_argument("--outdir", type=Path, default=None, help="Output directory (default: same folder as input)")
ap.add_argument("--prefix", type=str, default=None, help="Filename prefix (default: input filename stem)")
args = ap.parse_args()
csv_path: Path = args.csv
outdir: Path = args.outdir if args.outdir else csv_path.parent
prefix: str = args.prefix if args.prefix else csv_path.stem
fieldnames, rows = read_rows(csv_path)
# Basic sanity check for columns we rely on
required = {"Currency", "Description", "Net", "Transaction ID", "Reference Txn ID", "Date", "Time"}
missing_cols = required - set(fieldnames or [])
if missing_cols:
raise SystemExit(f"Missing expected columns: {', '.join(sorted(missing_cols))}")
# Split for import
gbp_rows = [
r for r in rows
if (r.get("Currency") == "GBP") and (r.get("Description") not in EXCLUDE_GBP_DESCRIPTIONS)
]
usd_rows = [
r for r in rows
if (r.get("Currency") == "USD") and (r.get("Description") not in EXCLUDE_USD_DESCRIPTIONS)
]
# Build quick lookup for FX components keyed by purchase transaction id
# For a USD purchase with Transaction ID = PID, PayPal records:
# GBP "Bank Deposit to PP Account" Reference Txn ID == PID
# GBP "General Currency Conversion" Reference Txn ID == PID (Net negative)
# USD "General Currency Conversion" Reference Txn ID == PID (Net positive)
fx_lookup = {}
for r in rows:
ref = (r.get("Reference Txn ID") or "").strip()
if not ref:
continue
fx_lookup.setdefault(ref, []).append(r)
fx_ref_rows = []
fx_missing = 0
for r in rows:
if r.get("Currency") != "USD":
continue
if r.get("Description") not in USD_PAYMENT_DESCRIPTIONS:
continue
pid = (r.get("Transaction ID") or "").strip()
if not pid:
continue
bundle = fx_lookup.get(pid, [])
gbp_deposit = next((x for x in bundle if x.get("Currency") == "GBP" and x.get("Description") == "Bank Deposit to PP Account"), None)
gbp_conv = next((x for x in bundle if x.get("Currency") == "GBP" and x.get("Description") == "General Currency Conversion"), None)
usd_conv = next((x for x in bundle if x.get("Currency") == "USD" and x.get("Description") == "General Currency Conversion"), None)
if not (gbp_deposit and gbp_conv and usd_conv):
fx_missing += 1
continue
gbp_topup = parse_float(gbp_deposit.get("Net"))
gbp_converted = abs(parse_float(gbp_conv.get("Net")))
usd_received = parse_float(usd_conv.get("Net"))
usd_spent = abs(parse_float(r.get("Net")))
rate = (usd_received / gbp_converted) if gbp_converted else 0.0
merchant = (r.get("Name") or "").strip() or "(blank name)"
fx_ref_rows.append({
"Date": r.get("Date"),
"Time": r.get("Time"),
"Merchant": merchant,
"GBP topup (bank->PayPal GBP)": f"{gbp_topup:.2f}",
"GBP converted (PayPal GBP->USD)": f"{gbp_converted:.2f}",
"USD received (PayPal USD)": f"{usd_received:.2f}",
"USD spent at merchant": f"{usd_spent:.2f}",
"Implied rate (USD per GBP)": f"{rate:.6f}",
"Purchase TxID": pid,
})
# Output paths
gbp_out = outdir / f"{prefix}_GBP_purchases.csv"
usd_out = outdir / f"{prefix}_USD_purchases.csv"
fx_out = outdir / f"{prefix}_FX_reference.csv"
write_rows(gbp_out, fieldnames, gbp_rows)
write_rows(usd_out, fieldnames, usd_rows)
fx_fields = ["Date","Time","Merchant","GBP topup (bank->PayPal GBP)","GBP converted (PayPal GBP->USD)","USD received (PayPal USD)","USD spent at merchant","Implied rate (USD per GBP)","Purchase TxID"]
write_rows(fx_out, fx_fields, fx_ref_rows)
# Summary
ccy_counts = Counter(r.get("Currency") for r in rows)
desc_counts = Counter(r.get("Description") for r in rows)
print("Input:", csv_path)
print("Rows:", len(rows), "| Currencies:", dict(ccy_counts))
print("Wrote:")
print(" ", gbp_out, f"({len(gbp_rows)} rows)")
print(" ", usd_out, f"({len(usd_rows)} rows)")
print(" ", fx_out, f"({len(fx_ref_rows)} FX rows; {fx_missing} missing bundles skipped)")
print("Top descriptions:", ", ".join([f"{k}={v}" for k,v in desc_counts.most_common(6)]))
if __name__ == "__main__":
main()