import pandas as pd
import json
import os
import re
from openpyxl import load_workbook

excel_path = r'f:\laravel\job\abality - Copy\public\ABILITY_PRICE_LIST_2026-1.xlsx'
output_img_dir = r'f:\laravel\job\abality - Copy\public\Images\extracted'
os.makedirs(output_img_dir, exist_ok=True)

wb = load_workbook(excel_path, data_only=True)
sheets = wb.sheetnames

all_data = []

# Map sheet names to brand names if needed
brand_map = {
    'YESPAN Encl.': 'YESPAN',
    'TENSE': 'TENSE',
    'ELMEX': 'ELMEX',
    'POWER SUPPLY': 'MEAN WELL', # Based on preview
    'DATAKOM': 'DATAKOM',
    'PANASSEMBLY': 'PAN ASSEMBLY',
    'ENCLOSURE': 'ENCLOSURE',
    'ACCESSORIES': 'ACCESSORIES',
    'PRESSURE TRANSMITTER': 'ABILITY' # Based on preview
}

def clean_val(val):
    if pd.isna(val): return None
    if isinstance(val, str): return val.strip()
    return val

for sheet_name in sheets:
    ws = wb[sheet_name]
    
    # Extract images for this sheet
    sheet_images = {}
    if hasattr(ws, '_images'):
        for i, img in enumerate(ws._images):
            try:
                row = img.anchor._from.row
                col = img.anchor._from.col
                img_filename = f"{sheet_name.replace(' ', '_')}_{row}_{col}.png"
                img_path = os.path.join(output_img_dir, img_filename)
                
                with open(img_path, "wb") as f:
                    f.write(img.ref.read())
                
                # Use (row, col) as key. Note: row/col are 0-indexed in openpyxl
                sheet_images[(row, col)] = img_filename
            except Exception as e:
                print(f"Error extracting image in {sheet_name}: {e}")

    # Read sheet data
    # Find headers. 
    # For YESPAN Encl., headers are row 1. For others, row 0.
    header_row = 1 if sheet_name == 'YESPAN Encl.' else 0
    df = pd.read_excel(excel_path, sheet_name=sheet_name, header=header_row, engine='openpyxl')
    
    # Standardize columns
    col_map = {}
    for col in df.columns:
        c = str(col).lower()
        if 'part' in c or 'code' in c or 'item' in c:
            col_map[col] = 'sku'
        elif 'description' in c:
            col_map[col] = 'description'
        elif 'price' in c or 'p.l' in c:
            col_map[col] = 'price'
    
    if 'sku' not in col_map.values():
        # Fallback: first column is SKU if not found
        col_map[df.columns[0]] = 'sku'
    
    # Filter and rename
    df = df.rename(columns=col_map)
    df = df[['sku', 'description', 'price']] if 'price' in col_map.values() else df[['sku', 'description']]
    if 'price' not in df.columns: df['price'] = 0
    
    brand = brand_map.get(sheet_name, sheet_name)
    
    for idx, row in df.iterrows():
        sku = clean_val(row['sku'])
        desc = clean_val(row['description'])
        price = clean_val(row['price'])
        
        if not sku or pd.isna(sku): continue
        if sku == 'Part No.' or sku == 'Item Code' or sku == 'Product Code': continue
        
        # Clean price (some have "SR " prefix)
        if isinstance(price, str):
            price = re.sub(r'[^\d.]', '', price)
            try: price = float(price)
            except: price = 0
            
        # Match image: Check ±1 row, then fallback to last seen image in the column
        excel_row = idx + header_row + 1
        image_file = None
        for r in range(excel_row - 1, excel_row + 2):
            for c in range(20): 
                if (r, c) in sheet_images:
                    image_file = sheet_images[(r, c)]
                    break
            if image_file: break
            
        # Fallback: find nearest image ABOVE in ANY column (for sheets like YESPAN)
        if not image_file:
            for r in range(excel_row - 1, -1, -1):
                for c in range(20):
                    if (r, c) in sheet_images:
                        image_file = sheet_images[(r, c)]
                        break
                if image_file: break
        
        # Create Name: [Brand] + [First 3-4 words of description]
        short_desc = ""
        if desc:
            words = desc.split()
            short_desc = " ".join(words[:5])
        
        product_name = f"{brand} {short_desc}".strip()
        
        all_data.append({
            "brand": brand,
            "sku": sku,
            "name": product_name,
            "description": desc,
            "price": price,
            "image": image_file
        })

with open(r'f:\laravel\job\abality - Copy\scratch\final_extracted_data.json', 'w', encoding='utf-8') as f:
    json.dump(all_data, f, ensure_ascii=False, indent=4)

print(f"Extraction complete. Total products: {len(all_data)}")
