import pandas as pd
import json
import os
import re

excel_path = r'f:\laravel\job\abality - Copy\public\ABILITY_PRICE_LIST_2026-1.xlsx'

# Read with header=1 (second row)
df = pd.read_excel(excel_path, header=1, engine='openpyxl')

# Clean column names
df.columns = [str(c).strip() for c in df.columns]

# Drop rows where Part No. is NaN
df = df.dropna(subset=['Part  No.'])

# Extract Brand from Part No (prefix before first dash or space)
def extract_brand(part_no):
    if not isinstance(part_no, str):
        return "Unknown"
    match = re.match(r'^([A-Z0-9]+)', part_no)
    if match:
        return match.group(1)
    return "Unknown"

df['Extracted_Brand'] = df['Part  No.'].apply(extract_brand)

# Sample the data
analysis = {
    "columns": df.columns.tolist(),
    "row_count": len(df),
    "brands": df['Extracted_Brand'].unique().tolist(),
    "sample_data": df.head(10).to_dict(orient='records')
}

with open(r'f:\laravel\job\abality - Copy\scratch\data_analysis.json', 'w', encoding='utf-8') as f:
    json.dump(analysis, f, ensure_ascii=False, indent=4)

print(f"Analysis complete. Found {len(df)} rows and {len(analysis['brands'])} brands.")
