import { test, expect, Page } from '@playwright/test';

/**
 * Sitemap-based URL validation tests
 *
 * These tests fetch URLs directly from the sitemaps and verify they resolve.
 * This catches issues where:
 * - URLs in sitemaps point to non-existent pages
 * - nginx routing doesn't serve mounted content correctly
 * - Static page generation produced broken links
 *
 * Handles nested sitemaps (sitemap indexes containing child sitemaps).
 */

// Number of random URLs to sample from each sitemap
const SAMPLE_SIZE = 5;

// Number of child sitemaps to sample when encountering a sitemap index
const CHILD_SITEMAP_SAMPLE_SIZE = 2;

// Helper to get random items from array
function getRandomItems<T>(array: T[], count: number): T[] {
  const shuffled = [...array].sort(() => 0.5 - Math.random());
  return shuffled.slice(0, Math.min(count, array.length));
}

// Parse URLs from sitemap XML (works for both <url><loc> and <sitemap><loc>)
function parseUrlsFromSitemap(xml: string): string[] {
  const urls: string[] = [];
  const locRegex = /<loc>([^<]+)<\/loc>/g;
  let match;
  while ((match = locRegex.exec(xml)) !== null) {
    urls.push(match[1]);
  }
  return urls;
}

// Check if XML is a sitemap index (contains <sitemapindex> or <sitemap> tags)
function isSitemapIndex(xml: string): boolean {
  return xml.includes('<sitemapindex') || (xml.includes('<sitemap>') && !xml.includes('<url>'));
}

// Check if URL looks like a sitemap (ends in .xml)
function isSitemapUrl(url: string): boolean {
  return url.endsWith('.xml');
}

// Convert absolute URL to relative path for test requests
function toRelativePath(url: string): string {
  try {
    const parsed = new URL(url);
    return parsed.pathname;
  } catch {
    return url;
  }
}

/**
 * Recursively sample and test URLs from a sitemap.
 * If the sitemap is an index, sample child sitemaps and recurse.
 * If the sitemap contains page URLs, sample and verify them.
 */
async function testSitemapUrls(
  page: Page,
  sitemapPath: string,
  depth: number = 0
): Promise<{ tested: number; errors: string[] }> {
  const indent = '  '.repeat(depth);
  const result = { tested: 0, errors: [] as string[] };

  const response = await page.goto(sitemapPath);

  if (response?.status() === 404) {
    console.log(`${indent}Sitemap not found: ${sitemapPath} (skipping)`);
    return result;
  }

  expect(response?.status(), `Sitemap ${sitemapPath} should load`).toBe(200);

  const xml = await page.content();
  const urls = parseUrlsFromSitemap(xml);

  if (urls.length === 0) {
    console.log(`${indent}No URLs found in ${sitemapPath}`);
    return result;
  }

  console.log(`${indent}Found ${urls.length} URLs in ${sitemapPath}`);

  if (isSitemapIndex(xml)) {
    // This is a sitemap index - sample child sitemaps and recurse
    console.log(`${indent}Detected sitemap index, sampling ${CHILD_SITEMAP_SAMPLE_SIZE} child sitemaps`);

    const childSitemaps = getRandomItems(urls, CHILD_SITEMAP_SAMPLE_SIZE);
    for (const childUrl of childSitemaps) {
      const childPath = toRelativePath(childUrl);
      console.log(`${indent}Processing child sitemap: ${childPath}`);

      const childResult = await testSitemapUrls(page, childPath, depth + 1);
      result.tested += childResult.tested;
      result.errors.push(...childResult.errors);
    }
  } else {
    // This is a regular sitemap with page URLs - sample and test
    const sampleUrls = getRandomItems(urls, SAMPLE_SIZE);

    for (const url of sampleUrls) {
      const path = toRelativePath(url);

      // Check if this URL is itself a sitemap (nested sitemap within urlset)
      if (isSitemapUrl(url)) {
        console.log(`${indent}Found nested sitemap URL: ${path}`);
        const nestedResult = await testSitemapUrls(page, path, depth + 1);
        result.tested += nestedResult.tested;
        result.errors.push(...nestedResult.errors);
      } else {
        // Regular page URL - verify it loads
        console.log(`${indent}Testing page: ${path}`);
        const pageResponse = await page.goto(path);
        result.tested++;

        if (pageResponse?.status() !== 200) {
          const error = `Expected 200 for ${path}, got ${pageResponse?.status()}`;
          console.log(`${indent}  ERROR: ${error}`);
          result.errors.push(error);
        } else {
          // Verify page has content
          const h1Visible = await page.locator('h1').isVisible().catch(() => false);
          if (!h1Visible) {
            const error = `Page ${path} loaded but has no visible h1`;
            console.log(`${indent}  WARNING: ${error}`);
            // Don't treat missing h1 as hard failure, just log it
          }
        }
      }
    }
  }

  return result;
}

test.describe('sitemap URL validation', () => {

  test('sitemap index loads and contains child sitemaps', async ({ page }) => {
    const response = await page.goto('/sitemap.xml');
    expect(response?.status()).toBe(200);

    const content = await page.content();
    expect(content).toContain('sitemapindex');
    expect(content).toContain('sitemap-candidates.xml');
  });

  test('candidates sitemap - sample URLs resolve', async ({ page }) => {
    const result = await testSitemapUrls(page, '/candidates/sitemap-candidates.xml');

    if (result.tested === 0) {
      test.skip();
      return;
    }

    console.log(`Tested ${result.tested} URLs from candidates sitemap`);
    expect(result.errors, `Errors: ${result.errors.join(', ')}`).toHaveLength(0);
  });

  test('committees sitemap - sample URLs resolve', async ({ page }) => {
    const result = await testSitemapUrls(page, '/committees/sitemap-committees.xml');

    if (result.tested === 0) {
      test.skip();
      return;
    }

    console.log(`Tested ${result.tested} URLs from committees sitemap`);
    expect(result.errors, `Errors: ${result.errors.join(', ')}`).toHaveLength(0);
  });

  test('parties sitemap - sample URLs resolve', async ({ page }) => {
    const result = await testSitemapUrls(page, '/parties/sitemap-parties.xml');

    if (result.tested === 0) {
      test.skip();
      return;
    }

    console.log(`Tested ${result.tested} URLs from parties sitemap`);
    expect(result.errors, `Errors: ${result.errors.join(', ')}`).toHaveLength(0);
  });

  test('offices sitemap - sample URLs resolve', async ({ page }) => {
    const result = await testSitemapUrls(page, '/offices/sitemap-offices.xml');

    if (result.tested === 0) {
      test.skip();
      return;
    }

    console.log(`Tested ${result.tested} URLs from offices sitemap`);
    expect(result.errors, `Errors: ${result.errors.join(', ')}`).toHaveLength(0);
  });

  test('districts sitemap - sample URLs resolve', async ({ page }) => {
    const result = await testSitemapUrls(page, '/districts/sitemap-districts.xml');

    if (result.tested === 0) {
      test.skip();
      return;
    }

    console.log(`Tested ${result.tested} URLs from districts sitemap`);
    expect(result.errors, `Errors: ${result.errors.join(', ')}`).toHaveLength(0);
  });

  test('states sitemap - sample URLs resolve', async ({ page }) => {
    const result = await testSitemapUrls(page, '/states/sitemap-states.xml');

    if (result.tested === 0) {
      test.skip();
      return;
    }

    console.log(`Tested ${result.tested} URLs from states sitemap`);
    expect(result.errors, `Errors: ${result.errors.join(', ')}`).toHaveLength(0);
  });

  test('elections sitemap - sample URLs resolve', async ({ page }) => {
    const result = await testSitemapUrls(page, '/elections/sitemap-elections.xml');

    if (result.tested === 0) {
      test.skip();
      return;
    }

    console.log(`Tested ${result.tested} URLs from elections sitemap`);
    expect(result.errors, `Errors: ${result.errors.join(', ')}`).toHaveLength(0);
  });

});
