import { test, expect, Page } from '@playwright/test';

/**
 * Sitemap-based URL validation tests
 *
 * These tests fetch URLs directly from the sitemaps and verify they resolve.
 * This catches issues where:
 * - URLs in sitemaps point to non-existent pages
 * - nginx routing doesn't serve mounted content correctly
 * - Static page generation produced broken links
 *
 * Handles nested sitemaps (sitemap indexes containing child sitemaps).
 *
 * Large sitemaps (candidates: 50K+ URLs, committees: 30K+) are fetched via
 * the fetch API instead of page.goto to avoid browser rendering timeouts
 * on multi-megabyte XML documents.
 */

// Number of random URLs to sample from each sitemap
const SAMPLE_SIZE = 5;

// Number of child sitemaps to sample when encountering a sitemap index
const CHILD_SITEMAP_SAMPLE_SIZE = 2;

// Per-page navigation timeout for entity pages (some generated pages are slow)
const PAGE_TIMEOUT = 15000;

// Helper to get random items from array
function getRandomItems<T>(array: T[], count: number): T[] {
  const shuffled = [...array].sort(() => 0.5 - Math.random());
  return shuffled.slice(0, Math.min(count, array.length));
}

// Parse URLs from sitemap XML (works for both <url><loc> and <sitemap><loc>)
function parseUrlsFromSitemap(xml: string): string[] {
  const urls: string[] = [];
  const locRegex = /<loc>([^<]+)<\/loc>/g;
  let match;
  while ((match = locRegex.exec(xml)) !== null) {
    urls.push(match[1]);
  }
  return urls;
}

// Check if XML is a sitemap index (contains <sitemapindex> or <sitemap> tags)
function isSitemapIndex(xml: string): boolean {
  return xml.includes('<sitemapindex') || (xml.includes('<sitemap>') && !xml.includes('<url>'));
}

// Check if URL looks like a sitemap (ends in .xml)
function isSitemapUrl(url: string): boolean {
  return url.endsWith('.xml');
}

// Convert absolute URL to relative path for test requests
function toRelativePath(url: string): string {
  try {
    const parsed = new URL(url);
    return parsed.pathname;
  } catch {
    return url;
  }
}

/**
 * Fetch sitemap XML via the fetch API (inside the browser context).
 * This avoids Playwright's page.goto rendering pipeline which times out
 * on very large XML documents (50K+ URLs).
 */
async function fetchSitemapXml(page: Page, baseUrl: string, sitemapPath: string): Promise<{ status: number; xml: string }> {
  // Use page.evaluate to fetch XML via browser fetch API.
  // Large sitemaps (50K+ URLs, multi-MB XML) need extra time to download.
  return await page.evaluate(async ({ base, path }) => {
    const res = await fetch(`${base}${path}`);
    const text = await res.text();
    return { status: res.status, xml: text };
  }, { base: baseUrl, path: sitemapPath });
}

/**
 * Recursively sample and test URLs from a sitemap.
 * If the sitemap is an index, sample child sitemaps and recurse.
 * If the sitemap contains page URLs, sample and verify them.
 *
 * Uses fetch API for sitemap XML retrieval (fast, no rendering),
 * and page.goto only for testing individual page URLs.
 */
async function testSitemapUrls(
  page: Page,
  baseUrl: string,
  sitemapPath: string,
  depth: number = 0
): Promise<{ tested: number; errors: string[] }> {
  const indent = '  '.repeat(depth);
  const result = { tested: 0, errors: [] as string[] };

  const { status, xml } = await fetchSitemapXml(page, baseUrl, sitemapPath);

  if (status === 404) {
    console.log(`${indent}Sitemap not found: ${sitemapPath} (skipping)`);
    return result;
  }

  expect(status, `Sitemap ${sitemapPath} should load`).toBe(200);

  const urls = parseUrlsFromSitemap(xml);

  if (urls.length === 0) {
    console.log(`${indent}No URLs found in ${sitemapPath}`);
    return result;
  }

  console.log(`${indent}Found ${urls.length} URLs in ${sitemapPath}`);

  if (isSitemapIndex(xml)) {
    // This is a sitemap index - sample child sitemaps and recurse
    console.log(`${indent}Detected sitemap index, sampling ${CHILD_SITEMAP_SAMPLE_SIZE} child sitemaps`);

    const childSitemaps = getRandomItems(urls, CHILD_SITEMAP_SAMPLE_SIZE);
    for (const childUrl of childSitemaps) {
      const childPath = toRelativePath(childUrl);
      console.log(`${indent}Processing child sitemap: ${childPath}`);

      const childResult = await testSitemapUrls(page, baseUrl, childPath, depth + 1);
      result.tested += childResult.tested;
      result.errors.push(...childResult.errors);
    }
  } else {
    // This is a regular sitemap with page URLs - sample and test
    const sampleUrls = getRandomItems(urls, SAMPLE_SIZE);

    for (const url of sampleUrls) {
      const path = toRelativePath(url);

      // Check if this URL is itself a sitemap (nested sitemap within urlset)
      if (isSitemapUrl(url)) {
        console.log(`${indent}Found nested sitemap URL: ${path}`);
        const nestedResult = await testSitemapUrls(page, baseUrl, path, depth + 1);
        result.tested += nestedResult.tested;
        result.errors.push(...nestedResult.errors);
      } else {
        // Regular page URL - verify it loads
        console.log(`${indent}Testing page: ${path}`);
        const pageResponse = await page.goto(path, { timeout: PAGE_TIMEOUT });
        result.tested++;

        if (pageResponse?.status() !== 200) {
          const error = `Expected 200 for ${path}, got ${pageResponse?.status()}`;
          console.log(`${indent}  ERROR: ${error}`);
          result.errors.push(error);
        } else {
          // Verify page has content
          const h1Visible = await page.locator('h1').isVisible().catch(() => false);
          if (!h1Visible) {
            const error = `Page ${path} loaded but has no visible h1`;
            console.log(`${indent}  WARNING: ${error}`);
            // Don't treat missing h1 as hard failure, just log it
          }
        }
      }
    }
  }

  console.log(`${indent}Tested ${result.tested} URLs from ${sitemapPath}`);
  return result;
}

test.describe('sitemap URL validation', () => {

  test('sitemap index loads and contains child sitemaps', async ({ page }) => {
    const response = await page.goto('/sitemap.xml');
    expect(response?.status()).toBe(200);

    const content = await page.content();
    expect(content).toContain('sitemapindex');
    expect(content).toContain('sitemap-candidates.xml');
  });

  test('candidates sitemap - sample URLs resolve', async ({ page }) => {
    test.setTimeout(120000);

    // Navigate to a simple page first so we have a page context for fetch
    await page.goto('/');
    const baseUrl = new URL(page.url()).origin;

    const result = await testSitemapUrls(page, baseUrl, '/candidates/sitemap-candidates.xml');

    if (result.tested === 0) {
      test.skip();
      return;
    }

    console.log(`Tested ${result.tested} URLs from candidates sitemap`);
    expect(result.errors, `Errors: ${result.errors.join(', ')}`).toHaveLength(0);
  });

  test('committees sitemap - sample URLs resolve', async ({ page }) => {
    test.setTimeout(120000);

    // Navigate to a simple page first so we have a page context for fetch
    await page.goto('/');
    const baseUrl = new URL(page.url()).origin;

    const result = await testSitemapUrls(page, baseUrl, '/committees/sitemap-committees.xml');

    if (result.tested === 0) {
      test.skip();
      return;
    }

    console.log(`Tested ${result.tested} URLs from committees sitemap`);
    expect(result.errors, `Errors: ${result.errors.join(', ')}`).toHaveLength(0);
  });

  test('parties sitemap - sample URLs resolve', async ({ page }) => {
    await page.goto('/');
    const baseUrl = new URL(page.url()).origin;

    const result = await testSitemapUrls(page, baseUrl, '/parties/sitemap-parties.xml');

    if (result.tested === 0) {
      test.skip();
      return;
    }

    console.log(`Tested ${result.tested} URLs from parties sitemap`);
    expect(result.errors, `Errors: ${result.errors.join(', ')}`).toHaveLength(0);
  });

  test('offices sitemap - sample URLs resolve', async ({ page }) => {
    await page.goto('/');
    const baseUrl = new URL(page.url()).origin;

    const result = await testSitemapUrls(page, baseUrl, '/offices/sitemap-offices.xml');

    if (result.tested === 0) {
      test.skip();
      return;
    }

    console.log(`Tested ${result.tested} URLs from offices sitemap`);
    expect(result.errors, `Errors: ${result.errors.join(', ')}`).toHaveLength(0);
  });

  test('districts sitemap - sample URLs resolve', async ({ page }) => {
    await page.goto('/');
    const baseUrl = new URL(page.url()).origin;

    const result = await testSitemapUrls(page, baseUrl, '/districts/sitemap-districts.xml');

    if (result.tested === 0) {
      test.skip();
      return;
    }

    console.log(`Tested ${result.tested} URLs from districts sitemap`);
    expect(result.errors, `Errors: ${result.errors.join(', ')}`).toHaveLength(0);
  });

  test('states sitemap - sample URLs resolve', async ({ page }) => {
    await page.goto('/');
    const baseUrl = new URL(page.url()).origin;

    const result = await testSitemapUrls(page, baseUrl, '/states/sitemap-states.xml');

    if (result.tested === 0) {
      test.skip();
      return;
    }

    console.log(`Tested ${result.tested} URLs from states sitemap`);
    expect(result.errors, `Errors: ${result.errors.join(', ')}`).toHaveLength(0);
  });

  test('elections sitemap - sample URLs resolve', async ({ page }) => {
    await page.goto('/');
    const baseUrl = new URL(page.url()).origin;

    const result = await testSitemapUrls(page, baseUrl, '/elections/sitemap-elections.xml');

    if (result.tested === 0) {
      test.skip();
      return;
    }

    console.log(`Tested ${result.tested} URLs from elections sitemap`);
    expect(result.errors, `Errors: ${result.errors.join(', ')}`).toHaveLength(0);
  });

});
