/**
 * 정적 매칭 Go/No-Go 평가 스크립트
 *
 * InsuWiki 기존 문서 50건을 대상으로 정적 매칭을 실행하고
 * precision/recall을 측정합니다.
 *
 * 사용법:
 *   npx ts-node scripts/evaluate-static-matching.ts
 *
 * Phase 1-A 품질 검증 스크립트
 * 생성일: 2026-03-22
 */

import * as admin from 'firebase-admin';
import * as path from 'path';
import * as fs from 'fs';

// ============================================================
// Firebase Admin 초기화
// ============================================================
if (!admin.apps.length) {
    const localKeyPath = path.resolve(__dirname, '../temp.j2h/insuwiki-j2h-902be7d0b6f5.json');
    if (fs.existsSync(localKeyPath)) {
        try {
            const serviceAccount = JSON.parse(fs.readFileSync(localKeyPath, 'utf8'));
            admin.initializeApp({
                credential: admin.credential.cert(serviceAccount),
                projectId: 'insuwiki-j2h',
            });
        } catch {
            admin.initializeApp();
            console.warn('⚠️ 서비스 계정 키 파싱 실패 — 기본 인증 사용');
        }
    } else {
        admin.initializeApp();
        console.log('ℹ️ 서비스 계정 키 없음 — 기본 인증 사용 (gcloud auth)');
    }
}

const db = admin.firestore();

// ============================================================
// 타입 정의 (functions/src/staticMatching.ts에서 복사)
// ============================================================

type LinkMethod = 'manual' | 'static' | 'embedding' | 'semantic';

interface InsuranceTerm {
    id: string;
    term: string;
    definition: string;
    commonAliases: string[];
    icdCodes?: string[];
    companyId: string;
    productId: string;
    pageNumber: number;
    verified: boolean;
    createdAt: FirebaseFirestore.Timestamp;
}

interface MatchResult {
    termId: string;
    term: string;
    method: LinkMethod;
    confidence: number;
    explanation: string;
}

interface NormalizeMap {
    [key: string]: string;
}

// ============================================================
// findMatchingTerms 순수 함수 (functions/src/staticMatching.ts에서 복사)
// ============================================================

function findMatchingTerms(
    content: string,
    terms: InsuranceTerm[],
    existingLinkIds: string[],
    normalizeMap: NormalizeMap = {}
): MatchResult[] {
    const results: MatchResult[] = [];
    const contentLower = content.toLowerCase();
    const contentNoSpace = contentLower.replace(/\s/g, '');
    const seenTermIds = new Set<string>();
    const excludeIds = new Set(existingLinkIds);

    for (const term of terms) {
        if (excludeIds.has(term.id)) continue;
        if (seenTermIds.has(term.id)) continue;

        const termLower = term.term.toLowerCase();

        // 1. exact match → confidence: 100
        if (contentLower.includes(termLower)) {
            results.push({
                termId: term.id,
                term: term.term,
                method: 'static',
                confidence: 100,
                explanation: `exact match: "${term.term}"`,
            });
            seenTermIds.add(term.id);
            continue;
        }

        // 2. alias match (commonAliases) → confidence: 90
        let aliasMatched = false;
        for (const alias of term.commonAliases) {
            const aliasLower = alias.toLowerCase();
            if (contentLower.includes(aliasLower)) {
                results.push({
                    termId: term.id,
                    term: term.term,
                    method: 'static',
                    confidence: 90,
                    explanation: `alias match: "${alias}" → "${term.term}"`,
                });
                seenTermIds.add(term.id);
                aliasMatched = true;
                break;
            }
        }
        if (aliasMatched) continue;

        // normalizeMap 적용 후 정규화된 용어 계산
        let normalizedTermLower = termLower;
        if (normalizeMap[termLower]) {
            normalizedTermLower = normalizeMap[termLower].toLowerCase();
        }
        const normalizedNoSpace = normalizedTermLower.replace(/\s/g, '');

        // 3. 공백 제거 후 match → confidence: 85
        if (normalizedNoSpace.length > 0 && contentNoSpace.includes(normalizedNoSpace)) {
            results.push({
                termId: term.id,
                term: term.term,
                method: 'static',
                confidence: 85,
                explanation: `공백 제거 match: "${term.term}"`,
            });
            seenTermIds.add(term.id);
            continue;
        }

        // 4. substring 포함 체크 → confidence: 70 (3자 이상 용어만)
        if (termLower.length >= 3) {
            const contentWords = contentLower.split(/[\s.,!?;:()[\]{}'"]/);
            const substringMatched = contentWords.some(word => {
                if (word.length < 2) return false;
                return termLower.startsWith(word) && word.length >= 2;
            });
            if (substringMatched) {
                results.push({
                    termId: term.id,
                    term: term.term,
                    method: 'static',
                    confidence: 70,
                    explanation: `substring match: "${term.term}"`,
                });
                seenTermIds.add(term.id);
            }
        }
    }

    return results;
}

// ============================================================
// 평가 전용 타입
// ============================================================

interface VerifiedMatchResult extends MatchResult {
    verified: boolean;       // true = true positive, false = false positive
    verifyReason: string;    // 검증 이유 설명
}

interface DocumentEvalResult {
    docId: string;
    title: string;
    matchCount: number;
    exactCount: number;
    aliasCount: number;
    spaceCount: number;
    substringCount: number;
    truePositives: number;
    falsePositives: number;
    verifiedMatches: VerifiedMatchResult[];
    goldStandard: string[];       // manual link target termIds (gold standard)
    matchedGold: number;          // 정적 매칭이 발견한 gold standard 수
}

interface EvaluationSummary {
    evaluatedAt: string;
    totalDocuments: number;
    totalTerms: number;
    totalMatches: number;
    truePositives: number;
    falsePositives: number;
    precision: number;
    recallMatchedGold: number;
    recallTotalGold: number;
    recall: number;
    confidenceBreakdown: {
        [confidence: number]: {
            total: number;
            truePositives: number;
            precision: number;
        };
    };
    documentResults: DocumentEvalResult[];
}

// ============================================================
// 매칭 결과 검증 함수
// ============================================================

function verifyMatchResult(
    result: MatchResult,
    content: string,
    terms: InsuranceTerm[]
): VerifiedMatchResult {
    const contentLower = content.toLowerCase();
    const contentNoSpace = contentLower.replace(/\s/g, '');
    const term = terms.find(t => t.id === result.termId);

    if (!term) {
        return {
            ...result,
            verified: false,
            verifyReason: '용어 정보 없음',
        };
    }

    const termLower = term.term.toLowerCase();

    switch (result.confidence) {
        case 100: {
            // exact: content에 term이 포함되어야 함
            const ok = contentLower.includes(termLower);
            return {
                ...result,
                verified: ok,
                verifyReason: ok
                    ? `content에 "${term.term}" 포함 확인`
                    : `content에 "${term.term}" 미포함 (검증 실패)`,
            };
        }
        case 90: {
            // alias: content에 alias가 포함되어야 함
            // explanation에서 alias 추출: `alias match: "ALIAS" → "TERM"`
            const aliasMatch = result.explanation.match(/alias match: "([^"]+)"/);
            const matchedAlias = aliasMatch ? aliasMatch[1] : '';
            if (matchedAlias) {
                const ok = contentLower.includes(matchedAlias.toLowerCase());
                return {
                    ...result,
                    verified: ok,
                    verifyReason: ok
                        ? `content에 alias "${matchedAlias}" 포함 확인`
                        : `content에 alias "${matchedAlias}" 미포함 (검증 실패)`,
                };
            }
            // alias 추출 실패 시 commonAliases 전체 검사
            const anyAliasOk = term.commonAliases.some(a =>
                contentLower.includes(a.toLowerCase())
            );
            return {
                ...result,
                verified: anyAliasOk,
                verifyReason: anyAliasOk
                    ? 'commonAliases 중 하나 포함 확인'
                    : 'commonAliases 모두 미포함 (검증 실패)',
            };
        }
        case 85: {
            // space-removed: content 공백 제거 후 term이 포함
            const termNoSpace = termLower.replace(/\s/g, '');
            const ok = termNoSpace.length > 0 && contentNoSpace.includes(termNoSpace);
            return {
                ...result,
                verified: ok,
                verifyReason: ok
                    ? `공백 제거 후 content에 "${term.term}" 포함 확인`
                    : `공백 제거 후에도 "${term.term}" 미포함 (검증 실패)`,
            };
        }
        case 70: {
            // substring: 검증 보류 (false positive 가능성 있음)
            // content의 단어 중 term의 앞부분과 일치하는지 재확인
            const contentWords = contentLower.split(/[\s.,!?;:()[\]{}'"]/);
            const substringOk = contentWords.some(word => {
                if (word.length < 2) return false;
                return termLower.startsWith(word) && word.length >= 2;
            });
            return {
                ...result,
                verified: substringOk,
                verifyReason: substringOk
                    ? `substring 매칭 확인 (검증 보류 - false positive 가능)`
                    : `substring 매칭 재검증 실패`,
            };
        }
        default:
            return {
                ...result,
                verified: false,
                verifyReason: `알 수 없는 confidence 값: ${result.confidence}`,
            };
    }
}

// ============================================================
// 메인 평가 함수
// ============================================================

async function runEvaluation(): Promise<void> {
    const startTime = new Date();
    const evaluatedAt = startTime.toISOString();

    console.log('\n=== InsuWiki 정적 매칭 Go/No-Go 평가 ===');
    console.log(`평가 일시: ${evaluatedAt}`);
    console.log('');

    // ── 1. insurance_terms 전체 로드 ────────────────────────
    console.log('insurance_terms 컬렉션 로드 중...');
    let terms: InsuranceTerm[] = [];
    try {
        const termsSnap = await db.collection('insurance_terms').get();
        termsSnap.forEach(doc => {
            terms.push({ id: doc.id, ...doc.data() } as InsuranceTerm);
        });
        console.log(`  → ${terms.length}개 용어 로드 완료`);
    } catch (err) {
        console.error('❌ insurance_terms 로드 실패:', err);
        process.exit(1);
    }

    // ── 2. config/normalizeMap 로드 ──────────────────────────
    console.log('config/normalizeMap 로드 중...');
    let normalizeMap: NormalizeMap = {};
    try {
        const nmDoc = await db.collection('config').doc('normalizeMap').get();
        normalizeMap = nmDoc.exists ? (nmDoc.data() as NormalizeMap) : {};
        console.log(`  → ${Object.keys(normalizeMap).length}개 정규화 규칙 로드 완료`);
    } catch (err) {
        console.warn('⚠️ normalizeMap 로드 실패, 빈 맵 사용:', err);
    }

    // ── 3. documents 컬렉션에서 50건 로드 (content 있는 것, createdAt desc) ──
    console.log('documents 컬렉션 로드 중 (최신 50건, content 있는 것)...');
    let documents: Array<{ id: string; title: string; content: string; outgoingLinkIds: string[] }> = [];
    try {
        const docsSnap = await db
            .collection('documents')
            .orderBy('createdAt', 'desc')
            .limit(200)   // content 없는 문서를 걸러내기 위해 넉넉하게 조회
            .get();

        for (const doc of docsSnap.docs) {
            const data = doc.data();
            const content: string = data.content || '';
            if (!content.trim()) continue;
            documents.push({
                id: doc.id,
                title: data.title || '',
                content,
                outgoingLinkIds: data.outgoingLinkIds || [],
            });
            if (documents.length >= 50) break;
        }
        console.log(`  → ${documents.length}개 문서 로드 완료`);
    } catch (err) {
        console.error('❌ documents 로드 실패:', err);
        process.exit(1);
    }

    if (documents.length === 0) {
        console.error('❌ 평가할 문서가 없습니다.');
        process.exit(1);
    }

    console.log(`\n평가 대상: ${documents.length}개 문서, ${terms.length}개 insurance_terms`);
    console.log('');

    // ── 4. 각 문서의 수동 링크(gold standard) 로드 ──────────
    console.log('links 컬렉션에서 manual 링크 로드 중...');
    // termId Set으로 insurance_terms 조회 가속
    const termIdSet = new Set(terms.map(t => t.id));

    // links 컬렉션 전체에서 method=manual 조회
    let manualLinksByDocId: Map<string, string[]> = new Map();
    try {
        const linksSnap = await db
            .collection('links')
            .where('method', '==', 'manual')
            .get();

        linksSnap.forEach(doc => {
            const data = doc.data();
            const sourceDocId: string = data.sourceDocId || '';
            const targetDocId: string = data.targetDocId || '';
            if (!sourceDocId || !targetDocId) return;

            // targetDocId가 insurance_terms에도 존재하는지 확인 → gold standard
            if (!termIdSet.has(targetDocId)) return;

            if (!manualLinksByDocId.has(sourceDocId)) {
                manualLinksByDocId.set(sourceDocId, []);
            }
            manualLinksByDocId.get(sourceDocId)!.push(targetDocId);
        });
        console.log(`  → manual 링크 ${linksSnap.size}건 처리 완료`);
    } catch (err) {
        console.warn('⚠️ links 컬렉션 로드 실패, recall 측정 생략:', err);
    }

    // ── 5. 문서별 매칭 실행 및 검증 ─────────────────────────
    console.log('\n--- 문서별 결과 ---');

    const documentResults: DocumentEvalResult[] = [];

    // confidence 별 집계
    const confidenceStats: {
        [confidence: number]: { total: number; truePositives: number };
    } = {
        100: { total: 0, truePositives: 0 },
        90: { total: 0, truePositives: 0 },
        85: { total: 0, truePositives: 0 },
        70: { total: 0, truePositives: 0 },
    };

    for (let i = 0; i < documents.length; i++) {
        const doc = documents[i];

        // findMatchingTerms 실행
        const rawMatches = findMatchingTerms(
            doc.content,
            terms,
            doc.outgoingLinkIds,
            normalizeMap
        );

        // 검증
        const verifiedMatches: VerifiedMatchResult[] = rawMatches.map(r =>
            verifyMatchResult(r, doc.content, terms)
        );

        // 집계
        const exactMatches = verifiedMatches.filter(r => r.confidence === 100);
        const aliasMatches = verifiedMatches.filter(r => r.confidence === 90);
        const spaceMatches = verifiedMatches.filter(r => r.confidence === 85);
        const substringMatches = verifiedMatches.filter(r => r.confidence === 70);

        const truePositives = verifiedMatches.filter(r => r.verified).length;
        const falsePositives = verifiedMatches.filter(r => !r.verified).length;

        // confidence 별 통계 누적
        for (const r of verifiedMatches) {
            if (confidenceStats[r.confidence] !== undefined) {
                confidenceStats[r.confidence].total++;
                if (r.verified) confidenceStats[r.confidence].truePositives++;
            }
        }

        // gold standard 처리
        const goldStandard = manualLinksByDocId.get(doc.id) || [];
        const matchedTermIds = new Set(verifiedMatches.map(r => r.termId));
        const matchedGold = goldStandard.filter(id => matchedTermIds.has(id)).length;

        const evalResult: DocumentEvalResult = {
            docId: doc.id,
            title: doc.title,
            matchCount: verifiedMatches.length,
            exactCount: exactMatches.length,
            aliasCount: aliasMatches.length,
            spaceCount: spaceMatches.length,
            substringCount: substringMatches.length,
            truePositives,
            falsePositives,
            verifiedMatches,
            goldStandard,
            matchedGold,
        };
        documentResults.push(evalResult);

        // 콘솔 출력
        console.log(`\n[${i + 1}] "${doc.title}" (docId: ${doc.id})`);
        console.log(
            `  매칭: ${verifiedMatches.length}건 (exact: ${exactMatches.length}, alias: ${aliasMatches.length}, space: ${spaceMatches.length}, substring: ${substringMatches.length})`
        );
        console.log(`  검증: true_positive: ${truePositives}, false_positive: ${falsePositives}`);

        if (verifiedMatches.length > 0) {
            console.log('  상세:');
            for (const r of verifiedMatches) {
                const icon = r.confidence === 70
                    ? '⚠️'
                    : r.verified ? '✅' : '❌';
                const confidenceLabel =
                    r.confidence === 100 ? 'exact match' :
                    r.confidence === 90  ? 'alias match' :
                    r.confidence === 85  ? 'space-removed match' :
                                          'substring match - 검증 보류';
                console.log(`    ${icon} ${r.term} (confidence: ${r.confidence}, ${confidenceLabel})`);
            }
        }

        if (goldStandard.length > 0) {
            console.log(`  Gold standard: ${goldStandard.length}건, 발견: ${matchedGold}건`);
        }
    }

    // ── 6. 종합 결과 계산 ────────────────────────────────────
    const totalMatches = documentResults.reduce((s, r) => s + r.matchCount, 0);
    const totalTruePositives = documentResults.reduce((s, r) => s + r.truePositives, 0);
    const totalFalsePositives = documentResults.reduce((s, r) => s + r.falsePositives, 0);
    const precision = totalMatches > 0
        ? Math.round((totalTruePositives / totalMatches) * 10000) / 100
        : 0;

    // recall: gold standard가 있는 문서만
    const docsWithGold = documentResults.filter(r => r.goldStandard.length > 0);
    const totalGold = docsWithGold.reduce((s, r) => s + r.goldStandard.length, 0);
    const totalMatchedGold = docsWithGold.reduce((s, r) => s + r.matchedGold, 0);
    const recall = totalGold > 0
        ? Math.round((totalMatchedGold / totalGold) * 10000) / 100
        : 0;

    // confidence 별 precision 계산
    const confidenceBreakdown: EvaluationSummary['confidenceBreakdown'] = {};
    for (const [confStr, stat] of Object.entries(confidenceStats)) {
        const conf = Number(confStr);
        confidenceBreakdown[conf] = {
            total: stat.total,
            truePositives: stat.truePositives,
            precision: stat.total > 0
                ? Math.round((stat.truePositives / stat.total) * 10000) / 100
                : 0,
        };
    }

    // ── 7. 종합 결과 출력 ────────────────────────────────────
    console.log('\n--- 종합 결과 ---');
    console.log(`총 매칭 건수: ${totalMatches}`);
    console.log(
        `Precision: ${precision}% (${totalTruePositives} / ${totalMatches})`
    );
    if (totalGold > 0) {
        console.log(
            `Recall: ${recall}% (${totalMatchedGold} / ${totalGold}) [gold standard ${totalGold}건 기준]`
        );
    } else {
        console.log('Recall: 측정 불가 (manual 링크 gold standard 없음)');
    }

    console.log('\n--- Confidence 별 분포 ---');
    const confLabels: { [k: number]: string } = {
        100: '100 (exact)    ',
        90:  '90 (alias)     ',
        85:  '85 (space)     ',
        70:  '70 (substring) ',
    };
    for (const conf of [100, 90, 85, 70]) {
        const stat = confidenceBreakdown[conf];
        console.log(
            `  ${confLabels[conf]}: ${stat.total}건 (precision: ${stat.precision}%)`
        );
    }

    // ── 8. 판정 ──────────────────────────────────────────────
    console.log('\n--- 판정 ---');
    if (precision >= 80) {
        console.log('✅ 정확도 80%+ → Phase 1-B 진행 가능');
    } else if (precision >= 60) {
        console.log('⚠️ 정확도 60-80% → 정규화 테이블 보강 필요');
    } else {
        console.log('❌ 정확도 60% 미만 → 보고 후 대기');
    }

    // ── 9. JSON 결과 파일 저장 ───────────────────────────────
    const summary: EvaluationSummary = {
        evaluatedAt,
        totalDocuments: documents.length,
        totalTerms: terms.length,
        totalMatches,
        truePositives: totalTruePositives,
        falsePositives: totalFalsePositives,
        precision,
        recallMatchedGold: totalMatchedGold,
        recallTotalGold: totalGold,
        recall,
        confidenceBreakdown,
        documentResults,
    };

    const outputPath = path.resolve(__dirname, 'evaluation-results.json');
    fs.writeFileSync(outputPath, JSON.stringify(summary, null, 2), 'utf8');
    console.log(`\n결과 파일 저장 완료: ${outputPath}`);
    console.log('');
}

// ============================================================
// 실행
// ============================================================
runEvaluation()
    .then(() => process.exit(0))
    .catch(err => {
        console.error('\n❌ 평가 실패:', err);
        process.exit(1);
    });