#!/usr/bin/env python3
"""Draft ALT text for ALL 2,218 images, mapped to the page/product context each image belongs to.
Rules:
 - Image attached to a PRODUCT  -> "<Product> – <Category> | Technosys"
 - Image attached to a PAGE     -> "<Page topic> – Technosys Equipments India"
 - Logo images                  -> brand alt
 - Decorative/UI (Frame, icon, bg, separator, arrow, shape) -> alt="" (correct: screen readers skip)
 - Orphan product-looking image -> generic material-handling brand alt
Nothing is applied to the site; output is a review/apply sheet."""
import json, glob, html, re

def c(s): return html.unescape(re.sub('<[^>]+>','',s or '')).strip()

media = json.load(open('media_with_parent.json'))
prods = json.load(open('products1.json')) + json.load(open('products2.json'))
pages = json.load(open('pages.json'))
cats  = json.load(open('categories.json'))
cat_by_id = {ct['id']: html.unescape(ct['name']) for ct in cats if ct['slug']!='uncategorized'}

# post_id -> context
ctx = {}
for p in prods:
    cat = ''
    for cid in (p.get('product_cat') or []):
        if cid in cat_by_id: cat = cat_by_id[cid]; break
    ctx[p['id']] = ('product', c(p['title']['rendered']), cat)
for p in pages:
    ctx[p['id']] = ('page', c(p['title']['rendered']).replace(' with Technosys','').replace(' &#8211; Technosys',''), '')

DECOR = re.compile(r'(frame|icon|bg|background|separator|divider|arrow|shape|pattern|blob|dots?|line|spacer|placeholder|ellipse|vector|group-?\d|rectangle|mask)', re.I)
LOGO  = re.compile(r'logo', re.I)

def draft(m):
    fn = m['source_url'].split('/')[-1]
    title = c(m.get('title',{}).get('rendered','') if isinstance(m.get('title'),dict) else (m.get('title') or ''))
    pid = m.get('post')
    if LOGO.search(fn) or LOGO.search(title):
        return 'Technosys Equipments – Material Handling Equipment Manufacturer India', 'logo'
    if pid in ctx:
        kind, name, cat = ctx[pid]
        if kind=='product':
            alt = f"{name} – {cat} manufacturer India | Technosys" if cat else f"{name} – Technosys Equipments India"
            return alt[:125], 'product'
        else:
            return f"{name} – Technosys Equipments, material handling India"[:125], 'page'
    # orphan
    if DECOR.search(fn) or DECOR.search(title):
        return '', 'decorative'   # intentional empty alt
    return 'Material handling equipment – Technosys Equipments India', 'orphan-generic'

out = []
from collections import Counter
kinds = Counter()
for m in media:
    alt, kind = draft(m)
    kinds[kind]+=1
    out.append({'id':m['id'],'url':m['source_url'],'parent':m.get('post'),'proposed_alt':alt,'kind':kind})

json.dump(out, open('all_alt.json','w'))
print('Total images:', len(out))
print('Breakdown by rule:', dict(kinds))
print('Non-empty ALT drafted:', sum(1 for x in out if x['proposed_alt']))
print('Intentional empty (decorative):', sum(1 for x in out if not x['proposed_alt']))
print('\nSamples:')
for x in out[:3]+[x for x in out if x['kind']=='product'][:3]+[x for x in out if x['kind']=='decorative'][:2]:
    print(f"  [{x['kind']}] {x['proposed_alt'] or '(empty - decorative)'}")
