<?php
/**
 * ============================================
 * FLOWBOT DCI v7.0 - ONE-CLICK INSTALLER
 * ============================================
 * Upload this file to your server and access it via browser.
 * It will automatically install all v7.0 files.
 * ============================================
 */

error_reporting(E_ALL);
ini_set('display_errors', 1);
set_time_limit(300);

$basePath = dirname(__FILE__);
if (basename($basePath) === 'deploy') {
    $basePath = dirname($basePath);
}

$results = [];
$errors = [];

// Database configuration - UPDATE THESE VALUES
$dbConfig = [
    'host' => 'localhost',
    'port' => 3306,
    'database' => 'digupdog_FEED',
    'username' => 'digupdog_FEEDadmin',
    'password' => 'Raimundinho1',
];

/**
 * Create directory if not exists
 */
function createDir($path) {
    if (!is_dir($path)) {
        if (mkdir($path, 0755, true)) {
            return "Created directory: $path";
        } else {
            return "ERROR: Failed to create directory: $path";
        }
    }
    return "Directory exists: $path";
}

/**
 * Write file with content
 */
function writeFile($path, $content) {
    $dir = dirname($path);
    if (!is_dir($dir)) {
        mkdir($dir, 0755, true);
    }
    if (file_put_contents($path, $content) !== false) {
        return "Created file: $path";
    }
    return "ERROR: Failed to create file: $path";
}

// ============================================
// START INSTALLATION
// ============================================

echo "<!DOCTYPE html><html><head><meta charset='UTF-8'><title>Flowb0t DCI v7.0 Installer</title>";
echo "<style>
body { font-family: 'Segoe UI', sans-serif; background: #1a1a2e; color: #f8fafc; padding: 40px; }
.container { max-width: 900px; margin: 0 auto; }
h1 { color: #6366f1; }
.success { color: #10b981; }
.error { color: #ef4444; }
.info { color: #3b82f6; }
pre { background: #0f0f1a; padding: 15px; border-radius: 8px; overflow-x: auto; font-size: 12px; }
.btn { display: inline-block; padding: 12px 24px; background: linear-gradient(135deg, #6366f1, #8b5cf6); color: white; text-decoration: none; border-radius: 8px; margin: 10px 5px; }
.btn:hover { opacity: 0.9; }
</style></head><body><div class='container'>";

echo "<h1>🚀 Flowb0t DCI v7.0 Installer</h1>";
echo "<p>Installing Unified Professional Crawler System...</p><hr>";

// Step 1: Create directories
echo "<h2>Step 1: Creating Directories</h2><pre>";
$dirs = [
    "$basePath/src/Services/Crawler",
    "$basePath/src/Api/v1",
    "$basePath/views",
    "$basePath/public/assets/css",
    "$basePath/public/assets/js",
    "$basePath/migrations",
];

foreach ($dirs as $dir) {
    echo createDir($dir) . "\n";
}
echo "</pre>";

// Step 2: Create Service Files
echo "<h2>Step 2: Creating Service Files</h2><pre>";

// InfiniteMode.php
$infiniteModeContent = <<<'PHP'
<?php
declare(strict_types=1);
namespace FlowbotDCI\Services\Crawler;
use FlowbotDCI\Core\Database;
use PDO;
use Generator;

class InfiniteMode {
    const VERSION = '1.0';
    private array $config = ['checkpoint_interval' => 100, 'max_memory_mb' => 512, 'query_rotation_interval' => 500, 'depth_expansion_interval' => 200, 'gc_interval' => 50, 'pause_on_errors' => 10, 'restart_delay_seconds' => 60];
    private array $state = ['status' => 'idle', 'urls_processed' => 0, 'urls_discovered' => 0, 'current_depth' => 1, 'current_query_index' => 0, 'errors_count' => 0, 'last_checkpoint' => null, 'start_time' => null, 'pause_until' => null];
    private array $queries = [];
    private array $urlQueue = [];
    private array $seenUrls = [];
    private ?PDO $pdo = null;
    private ?string $jobId = null;

    public function __construct(array $config = []) { $this->config = array_merge($this->config, $config); }
    public function setDatabase(Database $database): self { $this->pdo = $database->getConnection(); return $this; }
    public function setJobId(string $jobId): self { $this->jobId = $jobId; return $this; }
    public function configure(array $options): self { if (isset($options['checkpoint_interval'])) $this->config['checkpoint_interval'] = max(10, (int)$options['checkpoint_interval']); if (isset($options['max_memory_mb'])) $this->config['max_memory_mb'] = max(128, (int)$options['max_memory_mb']); return $this; }
    public function setQueries(array $queries): self { $this->queries = array_filter($queries, fn($q) => !empty(trim($q))); return $this; }
    public function addSeedUrls(array $urls): self { foreach ($urls as $url) $this->addToQueue($url, 0); return $this; }
    public function start(): Generator { $this->state['status'] = 'running'; $this->state['start_time'] = microtime(true); while ($this->state['status'] === 'running') { $urlData = $this->getNextUrl(); if (!$urlData) { sleep(1); continue; } yield ['type' => 'url', 'url' => $urlData['url'], 'depth' => $urlData['depth']]; $this->state['urls_processed']++; } yield ['type' => 'stopped', 'total_processed' => $this->state['urls_processed']]; }
    public function addDiscoveredUrls(array $urls, int $depth = 0): void { foreach ($urls as $url) if ($this->addToQueue($url, $depth)) $this->state['urls_discovered']++; }
    public function stop(): void { $this->state['status'] = 'stopped'; }
    public function pause(): void { $this->state['status'] = 'paused'; }
    public function resume(): void { $this->state['status'] = 'running'; $this->state['pause_until'] = null; }
    public function getState(): array { return array_merge($this->state, ['queue_size' => count($this->urlQueue), 'memory_mb' => memory_get_usage(true) / 1024 / 1024]); }
    private function addToQueue(string $url, int $depth, string $source = 'discovered'): bool { $urlHash = md5(strtolower(trim($url))); if (isset($this->seenUrls[$urlHash])) return false; $this->seenUrls[$urlHash] = true; $this->urlQueue[] = ['url' => $url, 'depth' => $depth, 'source' => $source]; return true; }
    private function getNextUrl(): ?array { return empty($this->urlQueue) ? null : array_shift($this->urlQueue); }
}
PHP;
echo writeFile("$basePath/src/Services/Crawler/InfiniteMode.php", $infiniteModeContent) . "\n";

// RobotsHandler.php
$robotsHandlerContent = <<<'PHP'
<?php
declare(strict_types=1);
namespace FlowbotDCI\Services\Crawler;
use FlowbotDCI\Core\Database;
use PDO;

class RobotsHandler {
    const VERSION = '1.0';
    private ?Database $database = null;
    private ?PDO $pdo = null;
    private string $userAgent = 'FlowbotDCI';
    private int $cacheTtl = 86400;
    private array $cache = [];

    public function setDatabase(Database $database): self { $this->database = $database; $this->pdo = $database->getConnection(); return $this; }
    public function setUserAgent(string $userAgent): self { $this->userAgent = $userAgent; return $this; }
    public function setCacheTtl(int $seconds): self { $this->cacheTtl = max(0, $seconds); return $this; }

    public function isAllowed(string $url): bool {
        $parts = parse_url($url);
        if (!$parts || !isset($parts['host'])) return true;
        $scheme = $parts['scheme'] ?? 'https';
        $host = $parts['host'];
        $path = $parts['path'] ?? '/';
        $robotsUrl = "{$scheme}://{$host}/robots.txt";
        $rules = $this->getRules($robotsUrl, $host);
        return $this->checkRules($rules, $path);
    }

    public function getCrawlDelay(string $url): int {
        $parts = parse_url($url);
        if (!$parts || !isset($parts['host'])) return 0;
        $scheme = $parts['scheme'] ?? 'https';
        $host = $parts['host'];
        $robotsUrl = "{$scheme}://{$host}/robots.txt";
        $rules = $this->getRules($robotsUrl, $host);
        return $rules['crawl_delay'] ?? 0;
    }

    public function getSitemaps(string $url): array {
        $parts = parse_url($url);
        if (!$parts || !isset($parts['host'])) return [];
        $scheme = $parts['scheme'] ?? 'https';
        $host = $parts['host'];
        $robotsUrl = "{$scheme}://{$host}/robots.txt";
        $rules = $this->getRules($robotsUrl, $host);
        return $rules['sitemaps'] ?? [];
    }

    private function getRules(string $robotsUrl, string $host): array {
        if (isset($this->cache[$host])) return $this->cache[$host];
        $content = $this->fetchRobotsTxt($robotsUrl);
        $rules = $this->parse($content);
        $this->cache[$host] = $rules;
        return $rules;
    }

    private function fetchRobotsTxt(string $url): string {
        $context = stream_context_create(['http' => ['timeout' => 5, 'user_agent' => $this->userAgent], 'ssl' => ['verify_peer' => false, 'verify_peer_name' => false]]);
        $content = @file_get_contents($url, false, $context);
        return $content !== false ? $content : '';
    }

    public function parse(string $content): array {
        $rules = ['allow' => [], 'disallow' => [], 'crawl_delay' => 0, 'sitemaps' => []];
        if (empty($content)) return $rules;
        $lines = explode("\n", $content);
        $matchesOurAgent = false;
        foreach ($lines as $line) {
            $line = trim($line);
            if (empty($line) || $line[0] === '#') continue;
            if (strpos($line, ':') === false) continue;
            [$directive, $value] = array_map('trim', explode(':', $line, 2));
            $directive = strtolower($directive);
            switch ($directive) {
                case 'user-agent': $matchesOurAgent = ($value === '*' || stripos($value, $this->userAgent) !== false); break;
                case 'allow': if ($matchesOurAgent && !empty($value)) $rules['allow'][] = $this->patternToRegex($value); break;
                case 'disallow': if ($matchesOurAgent && !empty($value)) $rules['disallow'][] = $this->patternToRegex($value); break;
                case 'crawl-delay': if ($matchesOurAgent) $rules['crawl_delay'] = (int)$value; break;
                case 'sitemap': if (!empty($value) && filter_var($value, FILTER_VALIDATE_URL)) $rules['sitemaps'][] = $value; break;
            }
        }
        return $rules;
    }

    private function patternToRegex(string $pattern): string {
        $pattern = preg_quote($pattern, '/');
        $pattern = str_replace('\\*', '.*', $pattern);
        if (substr($pattern, -2) === '\\$') $pattern = substr($pattern, 0, -2) . '$';
        return '/^' . $pattern . '/i';
    }

    private function checkRules(array $rules, string $path): bool {
        foreach ($rules['allow'] as $pattern) if (preg_match($pattern, $path)) return true;
        foreach ($rules['disallow'] as $pattern) if (preg_match($pattern, $path)) return false;
        return true;
    }

    public function clearCache(string $host): void { unset($this->cache[$host]); }
    public function clearAllCache(): void { $this->cache = []; }
}
PHP;
echo writeFile("$basePath/src/Services/Crawler/RobotsHandler.php", $robotsHandlerContent) . "\n";

// SitemapParser.php
$sitemapParserContent = <<<'PHP'
<?php
declare(strict_types=1);
namespace FlowbotDCI\Services\Crawler;
use Generator;

class SitemapParser {
    const VERSION = '1.0';
    private string $userAgent = 'FlowbotDCI/1.0';
    private int $timeout = 30;
    private int $maxUrls = 50000;

    public function setUserAgent(string $userAgent): self { $this->userAgent = $userAgent; return $this; }
    public function setTimeout(int $seconds): self { $this->timeout = max(5, $seconds); return $this; }
    public function setMaxUrls(int $max): self { $this->maxUrls = max(100, $max); return $this; }

    public function parse(string $sitemapUrl): Generator {
        $content = $this->fetch($sitemapUrl);
        if (empty($content)) return;
        if (substr($content, 0, 2) === "\x1f\x8b") $content = gzdecode($content);
        $xml = @simplexml_load_string($content);
        if (!$xml) return;
        $count = 0;
        if (isset($xml->sitemap)) {
            foreach ($xml->sitemap as $sitemap) {
                $loc = (string)$sitemap->loc;
                if (!empty($loc)) foreach ($this->parse($loc) as $url) { if (++$count > $this->maxUrls) return; yield $url; }
            }
        }
        if (isset($xml->url)) {
            foreach ($xml->url as $urlNode) {
                $url = (string)$urlNode->loc;
                if (!empty($url)) { if (++$count > $this->maxUrls) return; yield $url; }
            }
        }
    }

    public function parseWithMetadata(string $sitemapUrl): Generator {
        $content = $this->fetch($sitemapUrl);
        if (empty($content)) return;
        if (substr($content, 0, 2) === "\x1f\x8b") $content = gzdecode($content);
        $xml = @simplexml_load_string($content);
        if (!$xml) return;
        $count = 0;
        if (isset($xml->sitemap)) {
            foreach ($xml->sitemap as $sitemap) {
                $loc = (string)$sitemap->loc;
                if (!empty($loc)) foreach ($this->parseWithMetadata($loc) as $data) { if (++$count > $this->maxUrls) return; yield $data; }
            }
        }
        if (isset($xml->url)) {
            foreach ($xml->url as $urlNode) {
                $url = (string)$urlNode->loc;
                if (!empty($url)) {
                    if (++$count > $this->maxUrls) return;
                    yield ['url' => $url, 'lastmod' => isset($urlNode->lastmod) ? (string)$urlNode->lastmod : null, 'changefreq' => isset($urlNode->changefreq) ? (string)$urlNode->changefreq : null, 'priority' => isset($urlNode->priority) ? (float)$urlNode->priority : null];
                }
            }
        }
    }

    public function discoverSitemaps(string $baseUrl): array {
        $sitemaps = [];
        $parts = parse_url($baseUrl);
        if (!$parts || !isset($parts['host'])) return $sitemaps;
        $scheme = $parts['scheme'] ?? 'https';
        $host = $parts['host'];
        $robotsUrl = "{$scheme}://{$host}/robots.txt";
        $robotsContent = $this->fetch($robotsUrl);
        if (!empty($robotsContent)) {
            preg_match_all('/^Sitemap:\s*(.+)$/mi', $robotsContent, $matches);
            if (!empty($matches[1])) foreach ($matches[1] as $url) if (filter_var(trim($url), FILTER_VALIDATE_URL)) $sitemaps[] = trim($url);
        }
        $defaults = ["{$scheme}://{$host}/sitemap.xml", "{$scheme}://{$host}/sitemap_index.xml", "{$scheme}://{$host}/sitemap-index.xml"];
        foreach ($defaults as $url) if ($this->exists($url) && !in_array($url, $sitemaps)) $sitemaps[] = $url;
        return $sitemaps;
    }

    private function fetch(string $url): string {
        $context = stream_context_create(['http' => ['timeout' => $this->timeout, 'user_agent' => $this->userAgent, 'follow_location' => true, 'max_redirects' => 3], 'ssl' => ['verify_peer' => false, 'verify_peer_name' => false]]);
        $content = @file_get_contents($url, false, $context);
        return $content !== false ? $content : '';
    }

    private function exists(string $url): bool {
        $headers = @get_headers($url);
        return $headers && strpos($headers[0], '200') !== false;
    }
}
PHP;
echo writeFile("$basePath/src/Services/Crawler/SitemapParser.php", $sitemapParserContent) . "\n";

// RelevanceScorer.php
$relevanceScorerContent = <<<'PHP'
<?php
declare(strict_types=1);
namespace FlowbotDCI\Services\Crawler;

class RelevanceScorer {
    const VERSION = '1.0';
    private array $weights = ['title' => 10, 'h1' => 8, 'h2' => 5, 'meta_description' => 6, 'paragraph' => 3, 'url' => 4];
    private float $threshold = 2.0;
    private array $forcedDomains = [];
    private array $searchEngines = ['google.com', 'bing.com', 'yahoo.com', 'duckduckgo.com', 'baidu.com', 'yandex.'];

    public function setWeights(array $weights): self { $this->weights = array_merge($this->weights, $weights); return $this; }
    public function setThreshold(float $threshold): self { $this->threshold = max(0, $threshold); return $this; }
    public function setForcedDomains(array $domains): self { $this->forcedDomains = array_map('strtolower', $domains); return $this; }

    public function calculateScore(string $html, string $url, array $searchTerms): float {
        if (empty($searchTerms)) return 10.0;
        $domain = strtolower(parse_url($url, PHP_URL_HOST) ?? '');
        if ($this->isForcedDomain($domain)) return 100.0;
        if ($this->isSearchEngine($domain)) return 0.0;
        $zones = $this->extractZones($html);
        $totalScore = 0.0;
        foreach ($searchTerms as $term) {
            $term = strtolower(trim($term));
            if (empty($term)) continue;
            foreach ($zones as $zone => $content) {
                $content = strtolower($content);
                $weight = $this->weights[$zone] ?? 1;
                if (stripos($content, $term) !== false) $totalScore += $weight;
                $words = explode(' ', $term);
                if (count($words) > 1) { $matchCount = 0; foreach ($words as $word) if (stripos($content, $word) !== false) $matchCount++; if ($matchCount > 0) $totalScore += ($weight * 0.3 * ($matchCount / count($words))); }
            }
            if (stripos($url, str_replace(' ', '', $term)) !== false || stripos($url, str_replace(' ', '-', $term)) !== false) $totalScore += $this->weights['url'];
        }
        return round($totalScore, 2);
    }

    public function isRelevant(string $html, string $url, array $searchTerms): bool {
        $domain = strtolower(parse_url($url, PHP_URL_HOST) ?? '');
        if ($this->isForcedDomain($domain)) return true;
        if ($this->isSearchEngine($domain)) return false;
        if (empty($searchTerms)) return true;
        $score = $this->calculateScore($html, $url, $searchTerms);
        return $score >= $this->threshold;
    }

    public function analyze(string $html, string $url, array $searchTerms): array {
        $score = $this->calculateScore($html, $url, $searchTerms);
        $domain = strtolower(parse_url($url, PHP_URL_HOST) ?? '');
        return ['score' => $score, 'threshold' => $this->threshold, 'is_relevant' => $score >= $this->threshold || $this->isForcedDomain($domain), 'is_forced' => $this->isForcedDomain($domain), 'is_search_engine' => $this->isSearchEngine($domain), 'zones' => $this->extractZones($html)];
    }

    private function extractZones(string $html): array {
        $zones = ['title' => '', 'h1' => '', 'h2' => '', 'meta_description' => '', 'paragraph' => ''];
        if (preg_match('/<title[^>]*>([^<]+)<\/title>/i', $html, $m)) $zones['title'] = strip_tags($m[1]);
        if (preg_match_all('/<h1[^>]*>([^<]+)<\/h1>/i', $html, $m)) $zones['h1'] = implode(' ', $m[1]);
        if (preg_match_all('/<h2[^>]*>([^<]+)<\/h2>/i', $html, $m)) $zones['h2'] = implode(' ', array_slice($m[1], 0, 5));
        if (preg_match('/<meta[^>]+name=["\']description["\'][^>]+content=["\']([^"\']+)["\'][^>]*>/i', $html, $m)) $zones['meta_description'] = $m[1];
        elseif (preg_match('/<meta[^>]+content=["\']([^"\']+)["\'][^>]+name=["\']description["\'][^>]*>/i', $html, $m)) $zones['meta_description'] = $m[1];
        if (preg_match_all('/<p[^>]*>([^<]+)<\/p>/i', $html, $m)) $zones['paragraph'] = implode(' ', array_slice($m[1], 0, 10));
        return array_map('trim', $zones);
    }

    private function isForcedDomain(string $domain): bool {
        $domain = preg_replace('/^www\./', '', $domain);
        foreach ($this->forcedDomains as $forced) if ($domain === $forced || str_ends_with($domain, '.' . $forced)) return true;
        return false;
    }

    private function isSearchEngine(string $domain): bool {
        foreach ($this->searchEngines as $engine) if (stripos($domain, $engine) !== false) return true;
        return false;
    }
}
PHP;
echo writeFile("$basePath/src/Services/Crawler/RelevanceScorer.php", $relevanceScorerContent) . "\n";

// DuplicateDetector.php
$duplicateDetectorContent = <<<'PHP'
<?php
declare(strict_types=1);
namespace FlowbotDCI\Services\Crawler;
use FlowbotDCI\Core\Database;
use PDO;

class DuplicateDetector {
    const VERSION = '1.0';
    private ?PDO $pdo = null;
    private array $seenUrls = [];
    private array $seenHashes = [];
    private array $trackingParams = ['utm_source', 'utm_medium', 'utm_campaign', 'utm_term', 'utm_content', 'ref', 'source', 'fbclid', 'gclid', 'msclkid', '_ga', 'mc_cid', 'mc_eid', 'trk', 'trkid'];

    public function setDatabase(Database $database): self { $this->pdo = $database->getConnection(); return $this; }

    public function normalizeUrl(string $url): string {
        $url = trim($url);
        $url = preg_replace('#^https?://#i', '', $url);
        $url = preg_replace('#^www\.#i', '', $url);
        if (strpos($url, '?') !== false) {
            [$path, $query] = explode('?', $url, 2);
            parse_str($query, $params);
            foreach ($this->trackingParams as $param) unset($params[$param]);
            ksort($params);
            $url = $path . (!empty($params) ? '?' . http_build_query($params) : '');
        }
        $url = rtrim($url, '/');
        return strtolower($url);
    }

    public function hashUrl(string $url): string { return md5($this->normalizeUrl($url)); }
    public function hashContent(string $content): string { $content = strip_tags($content); $content = preg_replace('/\s+/', ' ', $content); $content = trim(strtolower($content)); return md5($content); }

    public function isDuplicateUrl(string $url): bool {
        $hash = $this->hashUrl($url);
        if (isset($this->seenUrls[$hash])) return true;
        if ($this->pdo) {
            try {
                $stmt = $this->pdo->prepare("SELECT 1 FROM crawler_content_hashes WHERE url_hash = ? LIMIT 1");
                $stmt->execute([$hash]);
                if ($stmt->fetch()) { $this->seenUrls[$hash] = true; return true; }
            } catch (\Exception $e) {}
        }
        return false;
    }

    public function isDuplicateContent(string $content): bool {
        $hash = $this->hashContent($content);
        if (isset($this->seenHashes[$hash])) return true;
        if ($this->pdo) {
            try {
                $stmt = $this->pdo->prepare("SELECT 1 FROM crawler_content_hashes WHERE content_hash = ? LIMIT 1");
                $stmt->execute([$hash]);
                if ($stmt->fetch()) { $this->seenHashes[$hash] = true; return true; }
            } catch (\Exception $e) {}
        }
        return false;
    }

    public function recordUrl(string $url, string $content = '', string $title = ''): void {
        $urlHash = $this->hashUrl($url);
        $contentHash = !empty($content) ? $this->hashContent($content) : '';
        $titleHash = !empty($title) ? md5(strtolower(trim($title))) : '';
        $this->seenUrls[$urlHash] = true;
        if (!empty($contentHash)) $this->seenHashes[$contentHash] = true;
        if ($this->pdo) {
            try {
                $stmt = $this->pdo->prepare("INSERT INTO crawler_content_hashes (url_hash, content_hash, title_hash, canonical_url, first_seen, last_seen) VALUES (?, ?, ?, ?, NOW(), NOW()) ON DUPLICATE KEY UPDATE content_hash = VALUES(content_hash), last_seen = NOW(), occurrence_count = occurrence_count + 1");
                $stmt->execute([$urlHash, $contentHash, $titleHash, $url]);
            } catch (\Exception $e) {}
        }
    }

    public function existsInDatabase(string $url): bool { return $this->isDuplicateUrl($url); }
    public function clearMemoryCache(): void { $this->seenUrls = []; $this->seenHashes = []; }
    public function getStats(): array { return ['memory_urls' => count($this->seenUrls), 'memory_hashes' => count($this->seenHashes)]; }
}
PHP;
echo writeFile("$basePath/src/Services/Crawler/DuplicateDetector.php", $duplicateDetectorContent) . "\n";

// ContentExtractor.php
$contentExtractorContent = <<<'PHP'
<?php
declare(strict_types=1);
namespace FlowbotDCI\Services\Crawler;

class ContentExtractor {
    const VERSION = '1.0';
    private array $defaultThumbnails = ['https://via.placeholder.com/300x200?text=No+Image'];
    private array $embedPatterns = [
        'youtube' => ['pattern' => '#(?:youtube\.com/watch\?v=|youtu\.be/)([a-zA-Z0-9_-]+)#', 'embed' => 'https://www.youtube.com/embed/%s'],
        'vimeo' => ['pattern' => '#vimeo\.com/(\d+)#', 'embed' => 'https://player.vimeo.com/video/%s'],
        'tiktok' => ['pattern' => '#tiktok\.com/@[\w.-]+/video/(\d+)#', 'embed' => 'https://www.tiktok.com/embed/v2/%s'],
    ];

    public function extract(string $html, string $url): array {
        return [
            'title' => $this->extractTitle($html),
            'description' => $this->extractDescription($html),
            'thumbnail' => $this->extractThumbnail($html, $url),
            'favicon' => $this->extractFavicon($html, $url),
            'canonical' => $this->extractCanonical($html, $url),
            'author' => $this->extractAuthor($html),
            'published_date' => $this->extractPublishedDate($html),
            'opengraph' => $this->extractOpenGraph($html),
            'twitter_card' => $this->extractTwitterCard($html),
            'embed' => $this->generateEmbed($url),
            'images' => $this->extractImages($html, $url),
            'keywords' => $this->extractKeywords($html),
        ];
    }

    public function extractTitle(string $html): string {
        if (preg_match('/<meta[^>]+property=["\']og:title["\'][^>]+content=["\']([^"\']+)["\'][^>]*>/i', $html, $m)) return html_entity_decode(trim($m[1]), ENT_QUOTES | ENT_HTML5);
        if (preg_match('/<title[^>]*>([^<]+)<\/title>/i', $html, $m)) return html_entity_decode(trim($m[1]), ENT_QUOTES | ENT_HTML5);
        if (preg_match('/<h1[^>]*>([^<]+)<\/h1>/i', $html, $m)) return html_entity_decode(trim($m[1]), ENT_QUOTES | ENT_HTML5);
        return '';
    }

    public function extractDescription(string $html): string {
        if (preg_match('/<meta[^>]+property=["\']og:description["\'][^>]+content=["\']([^"\']+)["\'][^>]*>/i', $html, $m)) return html_entity_decode(trim($m[1]), ENT_QUOTES | ENT_HTML5);
        if (preg_match('/<meta[^>]+name=["\']description["\'][^>]+content=["\']([^"\']+)["\'][^>]*>/i', $html, $m)) return html_entity_decode(trim($m[1]), ENT_QUOTES | ENT_HTML5);
        if (preg_match('/<p[^>]*>([^<]{50,})<\/p>/i', $html, $m)) return html_entity_decode(trim(substr($m[1], 0, 300)), ENT_QUOTES | ENT_HTML5);
        return '';
    }

    public function extractThumbnail(string $html, string $url): string {
        if (preg_match('/<meta[^>]+property=["\']og:image["\'][^>]+content=["\']([^"\']+)["\'][^>]*>/i', $html, $m)) return $this->resolveUrl($m[1], $url);
        if (preg_match('/<meta[^>]+name=["\']twitter:image["\'][^>]+content=["\']([^"\']+)["\'][^>]*>/i', $html, $m)) return $this->resolveUrl($m[1], $url);
        if (preg_match('/<img[^>]+src=["\']([^"\']+)["\'][^>]*>/i', $html, $m)) return $this->resolveUrl($m[1], $url);
        return $this->defaultThumbnails[0];
    }

    public function extractFavicon(string $html, string $url): string {
        if (preg_match('/<link[^>]+rel=["\'](?:shortcut )?icon["\'][^>]+href=["\']([^"\']+)["\'][^>]*>/i', $html, $m)) return $this->resolveUrl($m[1], $url);
        if (preg_match('/<link[^>]+href=["\']([^"\']+)["\'][^>]+rel=["\'](?:shortcut )?icon["\'][^>]*>/i', $html, $m)) return $this->resolveUrl($m[1], $url);
        $parts = parse_url($url);
        return ($parts['scheme'] ?? 'https') . '://' . ($parts['host'] ?? '') . '/favicon.ico';
    }

    public function extractCanonical(string $html, string $url): string {
        if (preg_match('/<link[^>]+rel=["\']canonical["\'][^>]+href=["\']([^"\']+)["\'][^>]*>/i', $html, $m)) return $this->resolveUrl($m[1], $url);
        if (preg_match('/<meta[^>]+property=["\']og:url["\'][^>]+content=["\']([^"\']+)["\'][^>]*>/i', $html, $m)) return $this->resolveUrl($m[1], $url);
        return $url;
    }

    public function extractAuthor(string $html): string {
        if (preg_match('/<meta[^>]+name=["\']author["\'][^>]+content=["\']([^"\']+)["\'][^>]*>/i', $html, $m)) return trim($m[1]);
        if (preg_match('/<meta[^>]+property=["\']article:author["\'][^>]+content=["\']([^"\']+)["\'][^>]*>/i', $html, $m)) return trim($m[1]);
        return '';
    }

    public function extractPublishedDate(string $html): ?string {
        if (preg_match('/<meta[^>]+property=["\']article:published_time["\'][^>]+content=["\']([^"\']+)["\'][^>]*>/i', $html, $m)) return $m[1];
        if (preg_match('/<time[^>]+datetime=["\']([^"\']+)["\'][^>]*>/i', $html, $m)) return $m[1];
        return null;
    }

    public function extractOpenGraph(string $html): array {
        $og = [];
        preg_match_all('/<meta[^>]+property=["\']og:([^"\']+)["\'][^>]+content=["\']([^"\']*)["\'][^>]*>/i', $html, $matches, PREG_SET_ORDER);
        foreach ($matches as $m) $og[$m[1]] = html_entity_decode($m[2], ENT_QUOTES | ENT_HTML5);
        return $og;
    }

    public function extractTwitterCard(string $html): array {
        $tc = [];
        preg_match_all('/<meta[^>]+name=["\']twitter:([^"\']+)["\'][^>]+content=["\']([^"\']*)["\'][^>]*>/i', $html, $matches, PREG_SET_ORDER);
        foreach ($matches as $m) $tc[$m[1]] = html_entity_decode($m[2], ENT_QUOTES | ENT_HTML5);
        return $tc;
    }

    public function generateEmbed(string $url): ?array {
        foreach ($this->embedPatterns as $type => $config) {
            if (preg_match($config['pattern'], $url, $m)) return ['type' => $type, 'id' => $m[1], 'embed_url' => sprintf($config['embed'], $m[1])];
        }
        return null;
    }

    public function extractImages(string $html, string $url): array {
        $images = [];
        preg_match_all('/<img[^>]+src=["\']([^"\']+)["\'][^>]*>/i', $html, $matches);
        foreach (array_slice($matches[1], 0, 10) as $src) {
            $resolved = $this->resolveUrl($src, $url);
            if (!empty($resolved)) $images[] = ['url' => $resolved];
        }
        return $images;
    }

    public function extractKeywords(string $html): array {
        if (preg_match('/<meta[^>]+name=["\']keywords["\'][^>]+content=["\']([^"\']+)["\'][^>]*>/i', $html, $m)) return array_map('trim', explode(',', $m[1]));
        return [];
    }

    private function resolveUrl(string $path, string $baseUrl): string {
        if (empty($path)) return '';
        if (preg_match('#^https?://#i', $path)) return $path;
        if (str_starts_with($path, '//')) return 'https:' . $path;
        $parts = parse_url($baseUrl);
        $base = ($parts['scheme'] ?? 'https') . '://' . ($parts['host'] ?? '');
        if (str_starts_with($path, '/')) return $base . $path;
        $basePath = isset($parts['path']) ? dirname($parts['path']) : '';
        return $base . $basePath . '/' . $path;
    }
}
PHP;
echo writeFile("$basePath/src/Services/Crawler/ContentExtractor.php", $contentExtractorContent) . "\n";

echo "</pre>";

// Step 3: Database Migration
echo "<h2>Step 3: Running Database Migration</h2><pre>";

$migrationSql = <<<'SQL'
-- Unified Crawler Schema v7.0
-- Alter crawler_jobs table
ALTER TABLE crawler_jobs
ADD COLUMN IF NOT EXISTS mode ENUM('deep','search','sitemap','infinite','hybrid') DEFAULT 'deep' AFTER id,
ADD COLUMN IF NOT EXISTS priority TINYINT DEFAULT 5 AFTER mode,
ADD COLUMN IF NOT EXISTS max_pages INT UNSIGNED DEFAULT 100 AFTER priority,
ADD COLUMN IF NOT EXISTS max_depth INT UNSIGNED DEFAULT 3 AFTER max_pages,
ADD COLUMN IF NOT EXISTS parallel_count INT UNSIGNED DEFAULT 5 AFTER max_depth,
ADD COLUMN IF NOT EXISTS same_domain_only BOOLEAN DEFAULT TRUE AFTER parallel_count,
ADD COLUMN IF NOT EXISTS relevance_threshold FLOAT DEFAULT 2.0 AFTER same_domain_only,
ADD COLUMN IF NOT EXISTS session_id VARCHAR(64) AFTER relevance_threshold;

-- Domain statistics table
CREATE TABLE IF NOT EXISTS crawler_domain_stats (
    id INT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
    domain VARCHAR(255) NOT NULL UNIQUE,
    total_visits INT UNSIGNED DEFAULT 0,
    successful_visits INT UNSIGNED DEFAULT 0,
    failed_visits INT UNSIGNED DEFAULT 0,
    avg_response_time FLOAT DEFAULT 0,
    last_visit DATETIME,
    last_http_code SMALLINT UNSIGNED,
    robots_txt_cached TEXT,
    robots_txt_expires DATETIME,
    crawl_delay INT UNSIGNED DEFAULT 0,
    is_blocked BOOLEAN DEFAULT FALSE,
    block_reason VARCHAR(255),
    created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
    updated_at DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
    INDEX idx_domain (domain),
    INDEX idx_blocked (is_blocked)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;

-- Content hash table
CREATE TABLE IF NOT EXISTS crawler_content_hashes (
    id INT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
    url_hash VARCHAR(64) NOT NULL UNIQUE,
    content_hash VARCHAR(64) NOT NULL,
    title_hash VARCHAR(64),
    first_seen DATETIME DEFAULT CURRENT_TIMESTAMP,
    last_seen DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
    occurrence_count INT UNSIGNED DEFAULT 1,
    canonical_url TEXT,
    INDEX idx_content (content_hash),
    INDEX idx_title (title_hash)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;

-- Metrics table
CREATE TABLE IF NOT EXISTS crawler_metrics (
    id INT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
    metric_date DATE NOT NULL,
    metric_hour TINYINT UNSIGNED,
    total_jobs INT UNSIGNED DEFAULT 0,
    completed_jobs INT UNSIGNED DEFAULT 0,
    failed_jobs INT UNSIGNED DEFAULT 0,
    total_urls_crawled INT UNSIGNED DEFAULT 0,
    total_urls_processed INT UNSIGNED DEFAULT 0,
    total_errors INT UNSIGNED DEFAULT 0,
    avg_job_duration FLOAT,
    avg_urls_per_job FLOAT,
    avg_response_time FLOAT,
    unique_domains INT UNSIGNED DEFAULT 0,
    created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
    UNIQUE KEY idx_date_hour (metric_date, metric_hour)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;

-- Checkpoints table
CREATE TABLE IF NOT EXISTS crawler_checkpoints (
    id INT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
    job_id VARCHAR(50) NOT NULL UNIQUE,
    checkpoint_data LONGTEXT,
    created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
    INDEX idx_job (job_id)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
SQL;

try {
    $dsn = sprintf('mysql:host=%s;port=%d;dbname=%s;charset=utf8mb4', $dbConfig['host'], $dbConfig['port'], $dbConfig['database']);
    $pdo = new PDO($dsn, $dbConfig['username'], $dbConfig['password'], [PDO::ATTR_ERRMODE => PDO::ERRMODE_EXCEPTION]);

    // Execute each statement
    $statements = array_filter(explode(';', $migrationSql));
    foreach ($statements as $stmt) {
        $stmt = trim($stmt);
        if (!empty($stmt)) {
            try {
                $pdo->exec($stmt);
                echo "<span class='success'>✓</span> Executed: " . substr($stmt, 0, 60) . "...\n";
            } catch (PDOException $e) {
                if (strpos($e->getMessage(), 'Duplicate column') !== false || strpos($e->getMessage(), 'already exists') !== false) {
                    echo "<span class='info'>ℹ</span> Skipped (exists): " . substr($stmt, 0, 60) . "...\n";
                } else {
                    echo "<span class='error'>✗</span> Error: " . $e->getMessage() . "\n";
                }
            }
        }
    }
    echo "\n<span class='success'>Database migration completed!</span>\n";
} catch (PDOException $e) {
    echo "<span class='error'>Database connection failed: " . $e->getMessage() . "</span>\n";
}

echo "</pre>";

// Step 4: Create View and Assets
echo "<h2>Step 4: Creating View & Assets</h2><pre>";

// Create crawler-unified.php view (simplified version)
$viewContent = file_get_contents(__DIR__ . '/../views/crawler-unified.php') ?? '';
if (empty($viewContent)) {
    $viewContent = <<<'HTML'
<?php $pageTitle = 'Unified Crawler'; ?>
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title><?= htmlspecialchars($pageTitle) ?> - Flowb0t DCI v7.0</title>
    <link rel="stylesheet" href="/assets/css/v7-crawler.css">
    <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet">
    <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
</head>
<body>
    <div class="app-container">
        <header class="app-header">
            <div class="header-left">
                <h1 class="app-logo"><span class="logo-icon">🚀</span> Flowb0t <span class="version">v7.0</span></h1>
                <nav class="main-nav">
                    <a href="/" class="nav-link">Dashboard</a>
                    <a href="/new" class="nav-link">Link Processor</a>
                    <a href="/crawler/unified" class="nav-link active">Unified Crawler</a>
                </nav>
            </div>
            <div class="header-right"><span class="status-badge online">System Online</span></div>
        </header>
        <main class="main-content">
            <div class="content-grid">
                <section class="panel input-panel">
                    <div class="panel-header"><h2 class="panel-title"><span class="icon">🔍</span> Crawler Configuration</h2></div>
                    <div class="panel-body">
                        <form id="crawlerForm" class="crawler-form">
                            <div class="form-group">
                                <label class="form-label">Crawl Mode</label>
                                <div class="mode-selector">
                                    <button type="button" class="mode-btn active" data-mode="search"><span class="mode-icon">🔎</span><span class="mode-name">Search</span></button>
                                    <button type="button" class="mode-btn" data-mode="deep"><span class="mode-icon">🕸️</span><span class="mode-name">Deep Crawl</span></button>
                                    <button type="button" class="mode-btn" data-mode="sitemap"><span class="mode-icon">🗺️</span><span class="mode-name">Sitemap</span></button>
                                    <button type="button" class="mode-btn" data-mode="hybrid"><span class="mode-icon">⚡</span><span class="mode-name">Hybrid</span></button>
                                </div>
                                <input type="hidden" name="mode" id="crawlMode" value="search">
                            </div>
                            <div class="form-group" id="searchTermsGroup">
                                <label class="form-label" for="searchTerms">Search Terms</label>
                                <textarea id="searchTerms" name="search_terms" class="form-textarea" rows="3" placeholder="Enter search terms (one per line)"></textarea>
                            </div>
                            <div class="form-group" id="seedUrlsGroup" style="display:none;">
                                <label class="form-label" for="seedUrls">Seed URLs</label>
                                <textarea id="seedUrls" name="seed_urls" class="form-textarea" rows="3" placeholder="Enter URLs to crawl (one per line)"></textarea>
                            </div>
                            <div class="form-group" id="searchEnginesGroup">
                                <label class="form-label">Search Engines</label>
                                <div class="chip-group">
                                    <label class="chip active"><input type="checkbox" name="engines[]" value="bing" checked><span class="chip-icon">🔵</span> Bing</label>
                                    <label class="chip active"><input type="checkbox" name="engines[]" value="yahoo" checked><span class="chip-icon">🟣</span> Yahoo</label>
                                    <label class="chip active"><input type="checkbox" name="engines[]" value="duckduckgo" checked><span class="chip-icon">🦆</span> DuckDuckGo</label>
                                </div>
                            </div>
                            <div class="form-actions">
                                <button type="submit" class="btn btn-primary btn-lg" id="startBtn"><span class="btn-icon">▶</span> Start Crawl</button>
                                <button type="button" class="btn btn-secondary" id="pauseBtn" disabled><span class="btn-icon">⏸</span> Pause</button>
                                <button type="button" class="btn btn-danger" id="stopBtn" disabled><span class="btn-icon">⏹</span> Stop</button>
                            </div>
                        </form>
                    </div>
                </section>
                <section class="panel progress-panel">
                    <div class="panel-header"><h2 class="panel-title"><span class="icon">📊</span> Progress</h2><span class="job-id" id="currentJobId">-</span></div>
                    <div class="panel-body">
                        <div class="progress-ring-container">
                            <svg class="progress-ring" viewBox="0 0 120 120">
                                <circle class="progress-ring-bg" cx="60" cy="60" r="54" />
                                <circle class="progress-ring-fill" cx="60" cy="60" r="54" stroke-dasharray="339.292" stroke-dashoffset="339.292" />
                            </svg>
                            <div class="progress-text"><span class="progress-value" id="progressValue">0</span><span class="progress-unit">%</span></div>
                        </div>
                        <div class="stats-grid">
                            <div class="stat-card"><span class="stat-value" id="statProcessed">0</span><span class="stat-label">Processed</span></div>
                            <div class="stat-card success"><span class="stat-value" id="statImported">0</span><span class="stat-label">Imported</span></div>
                            <div class="stat-card warning"><span class="stat-value" id="statIgnored">0</span><span class="stat-label">Ignored</span></div>
                            <div class="stat-card error"><span class="stat-value" id="statErrors">0</span><span class="stat-label">Errors</span></div>
                        </div>
                        <div class="time-stats">
                            <div class="time-stat"><span class="time-label">Elapsed</span><span class="time-value" id="elapsedTime">00:00:00</span></div>
                            <div class="time-stat"><span class="time-label">ETA</span><span class="time-value" id="etaTime">--:--:--</span></div>
                            <div class="time-stat"><span class="time-label">Rate</span><span class="time-value"><span id="processingRate">0</span> /s</span></div>
                        </div>
                        <div class="status-container"><span class="status-indicator" id="statusIndicator"><span class="status-dot"></span><span class="status-text">Ready</span></span></div>
                    </div>
                </section>
                <section class="panel logs-panel">
                    <div class="panel-header"><h2 class="panel-title"><span class="icon">📜</span> Live Logs</h2></div>
                    <div class="panel-body">
                        <div class="terminal" id="logTerminal">
                            <div class="terminal-content" id="logContent">
                                <div class="log-line info"><span class="log-time">[--:--:--]</span><span class="log-msg">Ready to start crawling...</span></div>
                            </div>
                        </div>
                    </div>
                </section>
                <section class="panel domains-panel">
                    <div class="panel-header"><h2 class="panel-title"><span class="icon">🌐</span> Domain Statistics</h2></div>
                    <div class="panel-body">
                        <div class="chart-container"><canvas id="domainsChart"></canvas></div>
                        <div class="domains-table-container">
                            <table class="domains-table" id="domainsTable">
                                <thead><tr><th>Domain</th><th>Visits</th><th>Success</th><th>Rate</th></tr></thead>
                                <tbody><tr class="empty-row"><td colspan="4">No domain data yet</td></tr></tbody>
                            </table>
                        </div>
                    </div>
                </section>
            </div>
        </main>
        <footer class="app-footer"><span>Flowb0t DCI v7.0 - Unified Professional Crawler</span><span class="memory-usage">Memory: <span id="memoryUsage">0</span> MB</span></footer>
    </div>
    <div class="toast-container" id="toastContainer"></div>
    <script src="/assets/js/crawler-unified.js"></script>
</body>
</html>
HTML;
}
echo writeFile("$basePath/views/crawler-unified.php", $viewContent) . "\n";

echo "</pre>";

// Final summary
echo "<h2 class='success'>✓ Installation Complete!</h2>";
echo "<p>The Unified Crawler v7.0 has been installed successfully.</p>";
echo "<p><strong>Access your new crawler at:</strong></p>";
echo "<a href='/crawler/unified' class='btn'>Open Unified Crawler</a>";
echo "<a href='/' class='btn'>Back to Dashboard</a>";

echo "<h3>Files Created:</h3><ul>";
echo "<li>src/Services/Crawler/InfiniteMode.php</li>";
echo "<li>src/Services/Crawler/RobotsHandler.php</li>";
echo "<li>src/Services/Crawler/SitemapParser.php</li>";
echo "<li>src/Services/Crawler/RelevanceScorer.php</li>";
echo "<li>src/Services/Crawler/DuplicateDetector.php</li>";
echo "<li>src/Services/Crawler/ContentExtractor.php</li>";
echo "<li>views/crawler-unified.php</li>";
echo "</ul>";

echo "<p class='info'><strong>Note:</strong> You should delete this installer file after use for security.</p>";
echo "<p><a href='?delete_installer=1' class='btn btn-danger' onclick='return confirm(\"Delete installer?\")'>Delete Installer</a></p>";

// Handle self-deletion
if (isset($_GET['delete_installer'])) {
    unlink(__FILE__);
    echo "<p class='success'>Installer deleted!</p>";
    echo "<script>setTimeout(function(){ window.location.href = '/crawler/unified'; }, 2000);</script>";
}

echo "</div></body></html>";
