<?php
/************************************************************
 * FLOWBOT DCI v7.0 - CRAWLER PROFISSIONAL UNIFICADO
 * -------------------------------------------------------------
 * Versão completa com todas as funcionalidades:
 * - Multi-buscadores (Bing, Yandex, Baidu, Yahoo Japan, DuckDuckGo)
 * - cURL Multi para alta concorrência
 * - Sistema de 4 fases de processamento
 * - Deep crawling com profundidade configurável
 * - Modo infinito
 * - Gerenciador completo (pause, resume, stop, edit, reiniciar)
 * - Relevância e score
 * - Domínios forçados
 * - Termo complementar
 * - Design profissional dark theme
 ************************************************************/

mb_internal_encoding("UTF-8");
error_reporting(E_ALL);
ignore_user_abort(true);
set_time_limit(0);
ini_set('memory_limit', '4096M');

// Base URL para navegação
define('BASE_URL', '/Flowb0t_DCI/v2');

/**********************************************
 * 1) CONFIGURAÇÕES DE BANCO
 **********************************************/
$host    = 'localhost';
$db      = 'digupdog_FEED';
$user    = 'digupdog_FEEDadmin';
$pass    = 'Raimundinho1';
$charset = 'utf8mb4';

$options = [
    PDO::ATTR_ERRMODE            => PDO::ERRMODE_EXCEPTION,
    PDO::ATTR_DEFAULT_FETCH_MODE => PDO::FETCH_ASSOC,
    PDO::ATTR_EMULATE_PREPARES   => false,
];

try {
    $pdo = new PDO("mysql:host=$host;dbname=$db;charset=$charset", $user, $pass, $options);
} catch (Exception $e) {
    die("Falha na conexão: " . $e->getMessage());
}

/**********************************************
 * Setup tabela de links vistos
 **********************************************/
function setupCrawlerSeenLinks(PDO $pdo) {
    $sql = "CREATE TABLE IF NOT EXISTS `crawler_seen_links_pro` (
        `id` BIGINT AUTO_INCREMENT PRIMARY KEY,
        `process_id` VARCHAR(255) NOT NULL,
        `link` TEXT NOT NULL,
        `depth` INT NOT NULL,
        INDEX (process_id(191))
    ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;";
    $pdo->exec($sql);
}
setupCrawlerSeenLinks($pdo);

define('TABLE_SEEN_LINKS', 'crawler_seen_links_pro');

/**********************************************
 * Domínios forçados (ignorar relevância)
 **********************************************/
$FORCED_DOMAINS = [
    'nypost.com',
    'cnn.com',
    'bbc.com',
    'reuters.com',
];

/**********************************************
 * 2) FUNÇÕES AUXILIARES
 **********************************************/
function isValidUrl($url) {
    if (!filter_var($url, FILTER_VALIDATE_URL)) {
        $try = 'https://' . ltrim($url, '/');
        if (!filter_var($try, FILTER_VALIDATE_URL)) {
            return false;
        }
        return $try;
    }
    return $url;
}

function cleanUrl($url) {
    if (strpos($url, '/url?q=') === 0) {
        $url = substr($url, 7);
    }
    $url = strtok($url, '&');
    return trim($url);
}

function shouldIgnoreLinkImmediately($url) {
    $lower = strtolower($url);
    if (strpos($lower, 'mailto:') === 0) return true;
    if (strpos($lower, 'javascript:') === 0) return true;
    if (strpos($lower, 'tel:') === 0) return true;
    if (strpos($lower, '#') === 0) return true;

    $exts = ['.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx', '.zip', '.rar', '.7z', '.exe', '.dmg', '.iso'];
    foreach ($exts as $e) {
        if (strpos($lower, $e) !== false) return true;
    }
    return false;
}

function isSearchEngineUrl($url) {
    $engines = ['bing.com', 'google.com', 'yahoo.com', 'duckduckgo.com', 'yandex.com', 'baidu.com', 'yahoo.co.jp'];
    $host = parse_url($url, PHP_URL_HOST);
    if (!$host) return false;
    foreach ($engines as $e) {
        if (stripos($host, $e) !== false) return true;
    }
    return false;
}

/**********************************************
 * 2.1) Funções de relevância
 **********************************************/
function hasNoindexNofollow(DOMDocument $dom) {
    $xp = new DOMXPath($dom);
    $ms = $xp->query('//meta[@name="robots"]');
    if ($ms->length > 0) {
        $c = strtolower($ms->item(0)->getAttribute('content'));
        if (strpos($c, 'noindex') !== false || strpos($c, 'nofollow') !== false) return true;
    }
    return false;
}

function calculateRelevanceScore(DOMDocument $dom, array $searchTerms) {
    if (empty($searchTerms)) return 999;

    $xp = new DOMXPath($dom);

    $tn = $xp->query('//title');
    $titleText = $tn->length ? mb_strtolower($tn->item(0)->textContent) : '';

    $dn = $xp->query('//meta[@name="description"]');
    $descText = $dn->length ? mb_strtolower($dn->item(0)->getAttribute('content')) : '';

    $h1Text = '';
    foreach ($xp->query('//h1') as $h1) $h1Text .= ' ' . mb_strtolower($h1->textContent);

    $h2Text = '';
    foreach ($xp->query('//h2') as $h2) $h2Text .= ' ' . mb_strtolower($h2->textContent);

    $pText = '';
    foreach ($xp->query('//p') as $p) $pText .= ' ' . mb_strtolower($p->textContent);

    $score = 0;
    $wTitle = 4; $wDesc = 3; $wH1 = 2; $wH2 = 1.5; $wP = 1;

    foreach ($searchTerms as $term) {
        $t = mb_strtolower($term);
        $score += substr_count($titleText, $t) * $wTitle;
        $score += substr_count($descText, $t) * $wDesc;
        $score += substr_count($h1Text, $t) * $wH1;
        $score += substr_count($h2Text, $t) * $wH2;
        $score += substr_count($pText, $t) * $wP;
    }
    return $score;
}

function isRelevant(DOMDocument $dom, $url, array $searchTerms, $threshold = 2, array $forcedDomains = []) {
    $host = parse_url($url, PHP_URL_HOST);
    if ($host) {
        $lc = strtolower($host);
        foreach ($forcedDomains as $fd) {
            if (strpos($lc, $fd) !== false) return true;
        }
    }
    if (isSearchEngineUrl($url)) return false;
    if (hasNoindexNofollow($dom)) return false;
    $score = calculateRelevanceScore($dom, $searchTerms);
    return ($score >= $threshold);
}

/**********************************************
 * 3) FUNÇÕES P/ pinfeeds / user_myhashtag
 **********************************************/
function getOrCreateUserId(PDO $pdo, $author) {
    $st = $pdo->prepare("SELECT ID FROM user_myhashtag WHERE username=? LIMIT 1");
    $st->execute([$author]);
    $row = $st->fetchColumn();
    if ($row) return $row;

    $arr = explode(' ', $author);
    $f = ucfirst($arr[0]);
    $l = ucfirst($arr[1] ?? 'Bot');
    $email = strtolower(preg_replace('/\s+/', '', $author)) . '@digupdog.com';
    $pass = password_hash('Raimundinho1', PASSWORD_DEFAULT);

    $st2 = $pdo->prepare("
        INSERT INTO user_myhashtag(username, email, senha, first_name, last_name, created_at, status, user_role)
        VALUES(?, ?, ?, ?, ?, NOW(), 'active', 'user')
    ");
    $st2->execute([$author, $email, $pass, $f, $l]);
    return $pdo->lastInsertId();
}

function generateRandomAuthor() {
    $names = [
        "Carlos Dias", "Mariana Silva", "João Santos", "Ana Monteiro",
        "Pedro Correia", "Maria Almeida", "Sofia Nunes", "Rodrigo Azevedo",
        "Luciana Araújo", "Felipe Santana", "Crawler Bot", "Auto Import"
    ];
    return $names[array_rand($names)];
}

function extractTagsFromTitle($title) {
    $t = mb_strtolower(preg_replace("/[.,\/#!\$%\^&\*;:{}=\-_~()\[\]\"']/", "", $title));
    $words = explode(' ', $t);
    $common = ["a", "an", "the", "and", "or", "but", "in", "at", "on", "with", "to", "for", "is", "of",
               "that", "it", "by", "from", "as", "are", "was", "be", "has", "have", "will", "this",
               "which", "its", "about", "up", "more", "who", "also", "they", "out", "he", "she",
               "you", "their", "we", "her", "his", "them", "been", "these", "would", "some", "can",
               "like", "there", "if", "all", "my", "what", "so", "then", "into", "just", "over",
               "do", "than", "when", "other", "how", "our", "any", "new", "me", "after", "most",
               "made", "only", "time", "where", "year", "years", "make", "does", "could", "were",
               "your", "good", "well", "que", "uma", "com", "para", "das", "como", "de", "e", "o"];
    $filtered = array_filter($words, function($x) use ($common) {
        return !in_array($x, $common) && mb_strlen($x) > 2;
    });
    return array_values($filtered);
}

/**********************************************
 * 3.1) Extração de meta/og/thumbnail
 **********************************************/
function getTitle(DOMXPath $xp) {
    $queries = ['//meta[@property="og:title"]/@content', '//title', '//h1'];
    foreach ($queries as $q) {
        $n = $xp->query($q);
        if ($n->length > 0) {
            $val = trim($n->item(0)->nodeValue);
            if ($val) return $val;
        }
    }
    return 'No title';
}

function getDescription(DOMXPath $xp) {
    $queries = [
        '//meta[@property="og:description"]/@content',
        '//meta[@name="description"]/@content',
        '//p'
    ];
    foreach ($queries as $q) {
        $n = $xp->query($q);
        if ($n->length > 0) {
            $val = trim($n->item(0)->nodeValue);
            if ($val) return substr($val, 0, 500);
        }
    }
    return 'No description';
}

function getThumbnail(DOMXPath $xp, $baseURL) {
    $queries = [
        '//meta[@property="og:image:secure_url"]/@content',
        '//meta[@property="og:image:url"]/@content',
        '//meta[@property="og:image"]/@content',
        '//meta[@name="twitter:image:src"]/@content',
        '//meta[@name="twitter:image"]/@content',
        '//link[@rel="apple-touch-icon"]/@href',
        '//img[contains(@class,"post-thumbnail")]/@src',
        '//figure/img/@src',
        '//img[contains(@class,"wp-post-image")]/@src',
        '//video/@poster',
        '//img/@src'
    ];
    foreach ($queries as $q) {
        $n = $xp->query($q);
        if ($n->length > 0) {
            $u = trim($n->item(0)->nodeValue);
            if (parse_url($u, PHP_URL_SCHEME) === null) {
                $u = rtrim($baseURL, '/') . '/' . ltrim($u, '/');
            }
            if (filter_var($u, FILTER_VALIDATE_URL)) return $u;
        }
    }
    return 'fallback_image.jpg';
}

function getFavicon(DOMXPath $xp, $baseURL) {
    $queries = ['//link[@rel="icon"]/@href', '//link[@rel="shortcut icon"]/@href'];
    foreach ($queries as $q) {
        $n = $xp->query($q);
        if ($n->length > 0) {
            $v = trim($n->item(0)->nodeValue);
            if (parse_url($v, PHP_URL_SCHEME) === null) {
                $v = rtrim($baseURL, '/') . '/' . ltrim($v, '/');
            }
            if (filter_var($v, FILTER_VALIDATE_URL)) return $v;
        }
    }
    return 'default_favicon.ico';
}

function getCanonicalUrl(DOMXPath $xp, $originalUrl) {
    $n = $xp->query('//link[@rel="canonical"]/@href');
    if ($n->length > 0) {
        $c = trim($n->item(0)->nodeValue);
        if (filter_var($c, FILTER_VALIDATE_URL)) return $c;
    }
    return $originalUrl;
}

/**********************************************
 * 3.2) Extract embed code para vídeo
 **********************************************/
function extractEmbedCode($url) {
    $domain = strtolower(parse_url($url, PHP_URL_HOST));

    if (strpos($domain, 'youtube.com') !== false || strpos($domain, 'youtu.be') !== false) {
        preg_match('/(?:v=|\/embed\/|youtu\.be\/)([a-zA-Z0-9_-]+)/', $url, $m);
        if (!empty($m[1])) {
            return "<iframe src='https://www.youtube.com/embed/{$m[1]}' width='560' height='315' frameborder='0' allowfullscreen></iframe>";
        }
    }

    if (strpos($domain, 'vimeo.com') !== false) {
        preg_match('/vimeo\.com\/(\d+)/', $url, $m);
        if (!empty($m[1])) {
            return "<iframe src='https://player.vimeo.com/video/{$m[1]}' width='560' height='315' frameborder='0' allowfullscreen></iframe>";
        }
    }

    if (strpos($domain, 'tiktok.com') !== false) {
        preg_match('/\/video\/(\d+)/', $url, $m);
        if (!empty($m[1])) {
            return "<iframe src='https://www.tiktok.com/embed/{$m[1]}' width='325' height='575' frameborder='0' allowfullscreen></iframe>";
        }
    }

    return "<iframe src='$url' width='560' height='315' frameborder='0' allowfullscreen></iframe>";
}

/**********************************************
 * 4) cURL MULTI (BATCH) - Alta concorrência
 **********************************************/
$cacheHtml = [];

function fetchBatchWebInfo(array $links, int $maxConc, int $timeout, bool $randUA = false) {
    global $cacheHtml;

    $mh = curl_multi_init();
    $results = [];
    $handles = [];
    $active = 0;
    $queue = $links;

    $userAgents = [
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
        "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
        "Mozilla/5.0 (iPhone; CPU iPhone OS 17_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1",
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0",
    ];

    $addHandle = function($item) use ($mh, &$handles, &$active, $timeout, $randUA, $userAgents, &$cacheHtml) {
        $url = $item['url'];
        $depth = $item['depth'];
        $pk = "$url|depth=$depth";

        if (isset($cacheHtml[$pk])) {
            $handles[$pk] = ['domHtml' => $cacheHtml[$pk], 'error' => ''];
            return;
        }

        $ch = curl_init($url);
        curl_setopt_array($ch, [
            CURLOPT_RETURNTRANSFER => true,
            CURLOPT_FOLLOWLOCATION => true,
            CURLOPT_ENCODING => 'gzip,deflate',
            CURLOPT_CONNECTTIMEOUT => 5,
            CURLOPT_TIMEOUT => $timeout,
            CURLOPT_SSL_VERIFYPEER => false,
            CURLOPT_USERAGENT => $randUA ? $userAgents[array_rand($userAgents)] : $userAgents[0],
            CURLOPT_HTTPHEADER => [
                'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
                'Accept-Language: en-US,en;q=0.5',
                'Connection: keep-alive'
            ],
            CURLOPT_PRIVATE => $pk
        ]);

        curl_multi_add_handle($mh, $ch);
        $handles[$pk] = $ch;
        $active++;
    };

    // Adicionar handles iniciais
    while ($active < $maxConc && !empty($queue)) {
        $addHandle(array_shift($queue));
    }

    // Processar
    do {
        curl_multi_exec($mh, $running);
        curl_multi_select($mh);

        while ($info = curl_multi_info_read($mh)) {
            if ($info['msg'] === CURLMSG_DONE) {
                $ch = $info['handle'];
                $pk = curl_getinfo($ch, CURLINFO_PRIVATE);

                if (curl_errno($ch) === 0) {
                    $html = curl_multi_getcontent($ch);
                    $results[$pk] = ['domHtml' => $html, 'error' => ''];
                    if ($html !== '') $cacheHtml[$pk] = $html;
                } else {
                    $results[$pk] = ['domHtml' => '', 'error' => curl_error($ch)];
                }

                curl_multi_remove_handle($mh, $ch);
                curl_close($ch);
                unset($handles[$pk]);
                $active--;

                if (!empty($queue)) {
                    $addHandle(array_shift($queue));
                }
            }
        }
    } while ($running || $active > 0);

    curl_multi_close($mh);

    // Adicionar resultados do cache
    foreach ($handles as $pk => $v) {
        if (is_array($v)) $results[$pk] = $v;
    }

    return $results;
}

/**********************************************
 * 5) FUNÇÕES DE BUSCA - Múltiplos motores
 **********************************************/
function simpleGetContents($url, $timeout = 10) {
    $ch = curl_init($url);
    curl_setopt_array($ch, [
        CURLOPT_RETURNTRANSFER => true,
        CURLOPT_FOLLOWLOCATION => true,
        CURLOPT_TIMEOUT => $timeout,
        CURLOPT_SSL_VERIFYPEER => false,
        CURLOPT_USERAGENT => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/120.0.0.0 Safari/537.36'
    ]);
    $html = curl_exec($ch);
    curl_close($ch);
    return $html;
}

function extractLinksFromHtml($html, $baseUrl = '') {
    $links = [];
    if (!$html) return $links;

    $dom = new DOMDocument();
    @$dom->loadHTML($html);

    foreach ($dom->getElementsByTagName('a') as $a) {
        $href = $a->getAttribute('href');
        if ($href) {
            $href = cleanUrl($href);
            if (strpos($href, 'http') === false && $baseUrl) {
                $p = parse_url($baseUrl);
                if (!empty($p['host'])) {
                    $href = ($p['scheme'] ?? 'https') . '://' . $p['host'] . '/' . ltrim($href, '/');
                }
            }
            if (isValidUrl($href) && !shouldIgnoreLinkImmediately($href) && !isSearchEngineUrl($href)) {
                $links[] = $href;
            }
        }
    }
    return array_unique($links);
}

function getBingSearchLinks($query, $maxPages = 5) {
    $all = [];
    for ($i = 0; $i < $maxPages; $i++) {
        $offset = $i * 10;
        $url = "https://www.bing.com/search?q=" . urlencode($query) . "&first=" . ($offset + 1);
        $html = simpleGetContents($url);
        if (!$html) break;

        $found = extractLinksFromHtml($html, $url);
        if (empty($found)) break;
        $all = array_merge($all, $found);
    }
    return array_unique($all);
}

function getDuckDuckGoLinks($query, $maxPages = 3) {
    $all = [];
    $url = "https://html.duckduckgo.com/html/?q=" . urlencode($query);
    $html = simpleGetContents($url);
    if ($html) {
        $all = extractLinksFromHtml($html, $url);
    }
    return array_unique($all);
}

function getYandexSearchLinks($query, $maxPages = 5) {
    $all = [];
    for ($p = 0; $p < $maxPages; $p++) {
        $url = "https://yandex.com/search/?text=" . urlencode($query) . "&p=$p";
        $html = simpleGetContents($url);
        if (!$html) break;

        $found = extractLinksFromHtml($html, $url);
        if (empty($found)) break;
        $all = array_merge($all, $found);
    }
    return array_unique($all);
}

function getBaiduSearchLinks($query, $maxPages = 5) {
    $all = [];
    for ($i = 0; $i < $maxPages; $i++) {
        $offset = $i * 10;
        $url = "https://www.baidu.com/s?wd=" . urlencode($query) . "&pn=" . $offset;
        $html = simpleGetContents($url);
        if (!$html) break;

        $found = extractLinksFromHtml($html, $url);
        if (empty($found)) break;
        $all = array_merge($all, $found);
    }
    return array_unique($all);
}

function getYahooJapanLinks($query, $maxPages = 5) {
    $all = [];
    for ($i = 0; $i < $maxPages; $i++) {
        $offset = 1 + ($i * 10);
        $url = "https://search.yahoo.co.jp/search?p=" . urlencode($query) . "&b=$offset";
        $html = simpleGetContents($url);
        if (!$html) break;

        $found = extractLinksFromHtml($html, $url);
        if (empty($found)) break;
        $all = array_merge($all, $found);
    }
    return array_unique($all);
}

function fetchSearchEngineLinks($engines, array $searchTermList, int $maxPages, &$allLinks) {
    foreach ($searchTermList as $term) {
        foreach ($engines as $engine) {
            $found = [];
            switch ($engine) {
                case 'bing':
                    $found = getBingSearchLinks($term, $maxPages);
                    break;
                case 'duckduckgo':
                    $found = getDuckDuckGoLinks($term, $maxPages);
                    break;
                case 'yandex':
                    $found = getYandexSearchLinks($term, $maxPages);
                    break;
                case 'baidu':
                    $found = getBaiduSearchLinks($term, $maxPages);
                    break;
                case 'yahoojp':
                    $found = getYahooJapanLinks($term, $maxPages);
                    break;
            }
            foreach ($found as $url) {
                $allLinks[] = ['url' => $url, 'depth' => 0];
            }
        }
    }
}

/**********************************************
 * 6) Helpers
 **********************************************/
function handleCurlFailure($url, $phaseIndex, $item, &$phaseQueues, &$output, &$processed, &$errors, &$ignored) {
    if ($phaseIndex < 3) {
        $output[] = ['type' => 'warning', 'msg' => "$url - Falha, reprocessar fase " . ($phaseIndex + 1)];
        $phaseQueues[$phaseIndex + 1][] = $item;
        $ignored++;
    } else {
        $output[] = ['type' => 'error', 'msg' => "$url - Falha final"];
        $errors++;
    }
    $processed++;
}

function checkAndMarkSeenLink(PDO $pdo, $processID, $url, $depth) {
    try {
        $st = $pdo->prepare("INSERT INTO " . TABLE_SEEN_LINKS . " (process_id, link, depth) VALUES (?, ?, ?)");
        $st->execute([$processID, $url, $depth]);
        return false;
    } catch (Exception $e) {
        return true; // Já existe
    }
}

/**********************************************
 * 7) DIRETÓRIO DOS PROCESSOS
 **********************************************/
$baseTempDir = __DIR__ . '/../temp/crawler_pro/';
if (!is_dir($baseTempDir)) @mkdir($baseTempDir, 0777, true);

// Limpar processos antigos (+1 dia)
foreach (glob($baseTempDir . '*') as $f) {
    if (is_dir($f) && (time() - filemtime($f) > 86400)) {
        array_map('unlink', glob("$f/*.*"));
        @rmdir($f);
    }
}

// Define process_id
$processID = !empty($_REQUEST['process_id'])
    ? preg_replace('/[^a-zA-Z0-9_\-]/', '', $_REQUEST['process_id'])
    : uniqid('pro_', true);

$procDir = $baseTempDir . $processID . '/';
$linksFilePath = $procDir . 'links.json';
$progressFilePath = $procDir . 'progress.json';
$timeFilePath = $procDir . 'time.json';
$logFilePath = $procDir . 'log.json';

/**********************************************
 * HEADER HTML UNIFICADO
 **********************************************/
function renderHeader($currentPage = 'crawler') {
    $baseUrl = BASE_URL;
    ?>
    <!DOCTYPE html>
    <html lang="pt-BR">
    <head>
        <meta charset="UTF-8">
        <meta name="viewport" content="width=device-width, initial-scale=1.0">
        <title>Crawler Pro - Flowbot v7.0</title>
        <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet">
        <style>
            :root {
                --bg-primary: #0a0a1a;
                --bg-secondary: #12122a;
                --bg-tertiary: #1a1a3a;
                --accent: #00ff88;
                --accent-dim: rgba(0,255,136,0.2);
                --text-primary: #ffffff;
                --text-secondary: #a0a0b0;
                --success: #22c55e;
                --warning: #f59e0b;
                --error: #ef4444;
                --info: #3b82f6;
            }

            * { box-sizing: border-box; margin: 0; padding: 0; }

            body {
                font-family: 'Inter', sans-serif;
                background: var(--bg-primary);
                color: var(--text-primary);
                min-height: 100vh;
            }

            .app-header {
                background: var(--bg-secondary);
                border-bottom: 1px solid rgba(255,255,255,0.1);
                padding: 15px 30px;
                display: flex;
                justify-content: space-between;
                align-items: center;
                position: sticky;
                top: 0;
                z-index: 100;
            }

            .header-left {
                display: flex;
                align-items: center;
                gap: 30px;
            }

            .app-logo {
                font-size: 24px;
                font-weight: 700;
                color: var(--accent);
                display: flex;
                align-items: center;
                gap: 10px;
            }

            .app-logo .version {
                font-size: 12px;
                background: var(--accent-dim);
                padding: 3px 8px;
                border-radius: 4px;
                color: var(--accent);
            }

            .main-nav {
                display: flex;
                gap: 5px;
            }

            .nav-link {
                color: var(--text-secondary);
                text-decoration: none;
                padding: 8px 16px;
                border-radius: 6px;
                font-weight: 500;
                font-size: 14px;
                transition: all 0.2s;
            }

            .nav-link:hover {
                color: var(--text-primary);
                background: rgba(255,255,255,0.05);
            }

            .nav-link.active {
                color: var(--accent);
                background: var(--accent-dim);
            }

            .status-badge {
                display: flex;
                align-items: center;
                gap: 6px;
                padding: 6px 12px;
                border-radius: 20px;
                font-size: 13px;
                font-weight: 500;
            }

            .status-badge.online {
                background: rgba(34,197,94,0.2);
                color: var(--success);
            }

            .status-badge.online::before {
                content: '';
                width: 8px;
                height: 8px;
                background: var(--success);
                border-radius: 50%;
                animation: pulse 2s infinite;
            }

            @keyframes pulse {
                0%, 100% { opacity: 1; }
                50% { opacity: 0.5; }
            }

            .main-content {
                padding: 30px;
                max-width: 1400px;
                margin: 0 auto;
            }

            @media (max-width: 768px) {
                .app-header {
                    flex-direction: column;
                    gap: 15px;
                    padding: 15px;
                }
                .header-left {
                    flex-direction: column;
                    gap: 15px;
                }
                .main-nav {
                    flex-wrap: wrap;
                    justify-content: center;
                }
                .main-content {
                    padding: 15px;
                }
            }
        </style>
    </head>
    <body>
        <header class="app-header">
            <div class="header-left">
                <h1 class="app-logo">
                    <span>🚀</span>
                    Flowb0t <span class="version">v7.0</span>
                </h1>
                <nav class="main-nav">
                    <a href="<?= $baseUrl ?>/public/" class="nav-link <?= $currentPage === 'dashboard' ? 'active' : '' ?>">Dashboard</a>
                    <a href="<?= $baseUrl ?>/public/new" class="nav-link <?= $currentPage === 'processor' ? 'active' : '' ?>">Link Processor</a>
                    <a href="<?= $baseUrl ?>/views/crawler-pro.php" class="nav-link <?= $currentPage === 'crawler' ? 'active' : '' ?>">Crawler Pro</a>
                    <a href="<?= $baseUrl ?>/views/crawler-pro.php?action=manager" class="nav-link <?= $currentPage === 'manager' ? 'active' : '' ?>">Gerenciador</a>
                    <a href="<?= $baseUrl ?>/public/history" class="nav-link <?= $currentPage === 'history' ? 'active' : '' ?>">History</a>
                </nav>
            </div>
            <div class="header-right">
                <span class="status-badge online">System Online</span>
            </div>
        </header>
    <?php
}

function renderFooter() {
    ?>
        <script>
            // Auto-refresh se estiver processando
            if (document.querySelector('.auto-refresh')) {
                setTimeout(() => location.reload(), 2000);
            }
        </script>
    </body>
    </html>
    <?php
}

/**********************************************
 * 8) AÇÕES DO MANAGER
 **********************************************/
if (isset($_GET['action'])) {
    $action = $_GET['action'];

    // MANAGER
    if ($action === 'manager') {
        renderHeader('manager');
        ?>
        <main class="main-content">
            <style>
                .manager-title { font-size: 28px; margin-bottom: 20px; display: flex; align-items: center; gap: 10px; }
                .manager-table { width: 100%; border-collapse: collapse; background: var(--bg-secondary); border-radius: 12px; overflow: hidden; }
                .manager-table th, .manager-table td { padding: 15px 20px; text-align: left; border-bottom: 1px solid rgba(255,255,255,0.05); }
                .manager-table th { background: var(--bg-tertiary); color: var(--accent); font-weight: 600; }
                .manager-table tr:hover { background: rgba(255,255,255,0.02); }
                .btn { display: inline-block; padding: 8px 14px; border-radius: 6px; text-decoration: none; font-weight: 500; font-size: 13px; margin: 2px; transition: all 0.2s; }
                .btn-primary { background: var(--info); color: white; }
                .btn-success { background: var(--success); color: white; }
                .btn-warning { background: var(--warning); color: black; }
                .btn-danger { background: var(--error); color: white; }
                .btn:hover { transform: translateY(-1px); filter: brightness(1.1); }
                .status-running { color: var(--success); }
                .status-paused { color: var(--warning); }
                .status-stopped { color: var(--error); }
                .empty-state { text-align: center; padding: 60px; color: var(--text-secondary); }
                .progress-mini { background: var(--bg-primary); border-radius: 10px; height: 8px; overflow: hidden; width: 100px; display: inline-block; margin-left: 10px; }
                .progress-mini-fill { height: 100%; background: linear-gradient(90deg, var(--accent), #00cc6a); }
            </style>

            <h1 class="manager-title">📋 Gerenciador de Processos</h1>

            <p style="margin-bottom: 20px;">
                <a href="?" class="btn btn-success">+ Novo Processo</a>
            </p>

            <?php
            $dirs = glob($baseTempDir . '*', GLOB_ONLYDIR);
            if (empty($dirs)):
            ?>
                <div class="empty-state">
                    <p style="font-size: 48px; margin-bottom: 20px;">📭</p>
                    <p>Nenhum processo ativo no momento.</p>
                </div>
            <?php else: ?>
                <table class="manager-table">
                    <thead>
                        <tr>
                            <th>Processo</th>
                            <th>Progresso</th>
                            <th>Status</th>
                            <th>Ações</th>
                        </tr>
                    </thead>
                    <tbody>
                    <?php foreach ($dirs as $d):
                        $pid = basename($d);
                        $pf = $d . "/progress.json";
                        if (!file_exists($pf)) continue;

                        $dat = json_decode(file_get_contents($pf), true);
                        if (!$dat) continue;

                        $isPaused = !empty($dat['is_paused']);
                        $isStop = !empty($dat['stop_flag']);
                        $imported = $dat['imported_links'] ?? 0;
                        $total = $dat['total_links'] ?? 0;
                        $percent = $total > 0 ? round(($imported / $total) * 100) : 0;
                    ?>
                        <tr>
                            <td>
                                <strong><?= htmlspecialchars(substr($pid, 0, 20)) ?></strong>
                                <br><small style="color: var(--text-secondary);"><?= $imported ?> importados</small>
                            </td>
                            <td>
                                <?= $percent ?>%
                                <div class="progress-mini">
                                    <div class="progress-mini-fill" style="width: <?= $percent ?>%;"></div>
                                </div>
                            </td>
                            <td>
                                <?php if ($isStop): ?>
                                    <span class="status-stopped">⏹ Parado</span>
                                <?php elseif ($isPaused): ?>
                                    <span class="status-paused">⏸ Pausado</span>
                                <?php else: ?>
                                    <span class="status-running">▶ Executando</span>
                                <?php endif; ?>
                            </td>
                            <td>
                                <?php if (!$isStop): ?>
                                    <?php if ($isPaused): ?>
                                        <a href="?action=resume&process_id=<?= urlencode($pid) ?>" class="btn btn-success">▶ Resume</a>
                                    <?php else: ?>
                                        <a href="?action=pause&process_id=<?= urlencode($pid) ?>" class="btn btn-warning">⏸ Pause</a>
                                    <?php endif; ?>
                                    <a href="?process_id=<?= urlencode($pid) ?>" class="btn btn-primary">👁 Ver</a>
                                    <a href="?action=edit&process_id=<?= urlencode($pid) ?>" class="btn btn-primary">✏️ Edit</a>
                                <?php endif; ?>
                                <a href="?action=stop&process_id=<?= urlencode($pid) ?>" class="btn btn-danger" onclick="return confirm('Tem certeza que deseja parar este processo?')">⏹ Stop</a>
                                <a href="?action=reiniciar&process_id=<?= urlencode($pid) ?>" class="btn btn-warning" onclick="return confirm('Reiniciar processo?')">🔄 Reiniciar</a>
                            </td>
                        </tr>
                    <?php endforeach; ?>
                    </tbody>
                </table>
            <?php endif; ?>
        </main>
        <?php
        renderFooter();
        exit;
    }

    // PAUSE
    if ($action === 'pause' && file_exists($progressFilePath)) {
        $dat = json_decode(file_get_contents($progressFilePath), true);
        $dat['is_paused'] = true;
        file_put_contents($progressFilePath, json_encode($dat));
        header("Location: ?action=manager");
        exit;
    }

    // RESUME
    if ($action === 'resume' && file_exists($progressFilePath)) {
        $dat = json_decode(file_get_contents($progressFilePath), true);
        $dat['is_paused'] = false;
        file_put_contents($progressFilePath, json_encode($dat));
        header("Location: ?action=manager");
        exit;
    }

    // STOP
    if ($action === 'stop') {
        if (file_exists($progressFilePath)) {
            $dat = json_decode(file_get_contents($progressFilePath), true);
            if ($dat) {
                $dat['stop_flag'] = true;
                file_put_contents($progressFilePath, json_encode($dat));
            }
        }
        if (is_dir($procDir)) {
            array_map('unlink', glob($procDir . '/*.*'));
            @rmdir($procDir);
        }
        header("Location: ?action=manager");
        exit;
    }

    // REINICIAR
    if ($action === 'reiniciar') {
        if (is_dir($procDir)) {
            array_map('unlink', glob($procDir . '/*.*'));
            @rmdir($procDir);
        }
        header("Location: ?");
        exit;
    }

    // EDIT
    if ($action === 'edit') {
        if (!file_exists($progressFilePath)) {
            header("Location: ?action=manager");
            exit;
        }
        $pd = json_decode(file_get_contents($progressFilePath), true);

        renderHeader('manager');
        ?>
        <main class="main-content">
            <style>
                .edit-form { background: var(--bg-secondary); padding: 30px; border-radius: 12px; max-width: 500px; }
                .edit-form h2 { margin-bottom: 20px; }
                .edit-form label { display: block; margin-bottom: 8px; color: var(--text-secondary); font-weight: 500; }
                .edit-form input { width: 100%; padding: 12px; border: 1px solid rgba(255,255,255,0.1); border-radius: 8px; background: var(--bg-primary); color: white; font-size: 16px; margin-bottom: 20px; }
                .edit-form button { padding: 12px 30px; background: var(--accent); color: black; border: none; border-radius: 8px; font-weight: 600; cursor: pointer; }
            </style>

            <div class="edit-form">
                <h2>✏️ Editar Processo</h2>
                <p style="color: var(--text-secondary); margin-bottom: 20px;"><?= htmlspecialchars($processID) ?></p>

                <form method="POST" action="?action=edit_save&process_id=<?= urlencode($processID) ?>">
                    <label>Profundidade máxima (max_depth):</label>
                    <input type="number" name="max_depth" value="<?= $pd['max_depth'] ?? 1 ?>" min="1" max="10">

                    <label>Máximo de posts (max_posts):</label>
                    <input type="number" name="max_posts" value="<?= $pd['max_posts'] ?? 1000 ?>" min="1">

                    <label>Relevância mínima:</label>
                    <input type="number" name="relevance_threshold" value="<?= $pd['relevance_threshold'] ?? 2 ?>" min="0">

                    <button type="submit">💾 Salvar Alterações</button>
                </form>

                <p style="margin-top: 20px;"><a href="?action=manager" style="color: var(--accent);">← Voltar ao Gerenciador</a></p>
            </div>
        </main>
        <?php
        renderFooter();
        exit;
    }

    // EDIT SAVE
    if ($action === 'edit_save') {
        if (file_exists($progressFilePath)) {
            $pd = json_decode(file_get_contents($progressFilePath), true);
            $pd['max_depth'] = (int)($_POST['max_depth'] ?? 1);
            $pd['max_posts'] = (int)($_POST['max_posts'] ?? 1000);
            $pd['relevance_threshold'] = (int)($_POST['relevance_threshold'] ?? 2);
            file_put_contents($progressFilePath, json_encode($pd));
        }
        header("Location: ?action=manager");
        exit;
    }
}

/**********************************************
 * 9) PROCESSAR FORMULÁRIO INICIAL
 **********************************************/
if ($_SERVER['REQUEST_METHOD'] === 'POST' && (isset($_POST['search_terms']) || isset($_POST['site_list']) || isset($_POST['individual_links']))) {

    $searchTermsRaw = trim($_POST['search_terms'] ?? '');
    $siteListRaw = trim($_POST['site_list'] ?? '');
    $individualLinksRaw = trim($_POST['individual_links'] ?? '');
    $commonTerm = trim($_POST['common_term'] ?? '');

    $maxDepth = max(1, (int)($_POST['max_depth'] ?? 2));
    $maxPosts = max(1, (int)($_POST['max_posts'] ?? 100));
    $maxSearchPages = max(1, (int)($_POST['max_search_pages'] ?? 5));
    $maxLinksPerPage = max(1, (int)($_POST['max_links_per_page'] ?? 50));
    $minRelevance = max(0, (int)($_POST['min_relevance'] ?? 2));

    $infiniteCrawl = isset($_POST['infinite_crawl']);
    $limitToSameDomain = isset($_POST['limit_to_same_domain']);

    // Motores selecionados
    $engines = [];
    if (!empty($_POST['engine_bing'])) $engines[] = 'bing';
    if (!empty($_POST['engine_ddg'])) $engines[] = 'duckduckgo';
    if (!empty($_POST['engine_yandex'])) $engines[] = 'yandex';
    if (!empty($_POST['engine_baidu'])) $engines[] = 'baidu';
    if (!empty($_POST['engine_yahoojp'])) $engines[] = 'yahoojp';

    // Processar termos
    $searchTermList = [];
    foreach (preg_split('/\r?\n/', $searchTermsRaw) as $line) {
        $line = trim($line);
        if ($line) {
            if ($commonTerm !== '') {
                $line .= ' ' . $commonTerm;
            }
            $searchTermList[] = $line;
        }
    }

    // Processar sites
    $siteList = [];
    foreach (preg_split('/\r?\n/', $siteListRaw) as $line) {
        $line = trim($line);
        if ($line && ($v = isValidUrl($line)) && !shouldIgnoreLinkImmediately($v)) {
            $siteList[] = $v;
        }
    }

    // Processar links individuais
    $indivList = [];
    foreach (preg_split('/\r?\n/', $individualLinksRaw) as $line) {
        $line = trim($line);
        if ($line && ($v = isValidUrl($line)) && !shouldIgnoreLinkImmediately($v)) {
            $indivList[] = $v;
        }
    }

    // Criar diretório
    if (is_dir($procDir)) {
        array_map('unlink', glob($procDir . '/*.*'));
    } else {
        @mkdir($procDir, 0777, true);
    }

    // Limpar seen_links
    $pdo->prepare("DELETE FROM " . TABLE_SEEN_LINKS . " WHERE process_id = ?")->execute([$processID]);

    // Coletar links
    $allLinks = [];

    // Seeds
    foreach ($siteList as $s) {
        $allLinks[] = ['url' => $s, 'depth' => 0];
    }
    foreach ($indivList as $u) {
        $allLinks[] = ['url' => $u, 'depth' => 0];
    }

    // Buscar nos motores
    if (!empty($engines) && !empty($searchTermList)) {
        fetchSearchEngineLinks($engines, $searchTermList, $maxSearchPages, $allLinks);
    }

    // Remover duplicados
    $unique = [];
    foreach ($allLinks as $item) {
        $key = $item['url'];
        if (!isset($unique[$key])) {
            $unique[$key] = $item;
        }
    }
    $allLinks = array_values($unique);

    // Salvar
    file_put_contents($linksFilePath, json_encode($allLinks, JSON_PRETTY_PRINT));

    // 4 filas de fase
    $phaseQueues = [[], [], [], []];
    $phaseQueues[0] = $allLinks;

    $data = [
        'total_links' => count($allLinks),
        'processed_links' => 0,
        'ignored_links' => 0,
        'error_links' => 0,
        'imported_links' => 0,
        'lotes_processados' => 0,
        'max_lote_time' => 0,
        'last_batch_time' => 0,
        'elapsed_time' => 0,
        'remaining_time' => 0,
        'processing_rate' => 0,
        'phaseIndex' => 0,
        'phaseQueues' => $phaseQueues,
        'max_depth' => $maxDepth,
        'max_posts' => $maxPosts,
        'search_terms' => $searchTermList,
        'infinite_crawl' => $infiniteCrawl,
        'limit_to_same_domain' => $limitToSameDomain,
        'max_links_per_page' => $maxLinksPerPage,
        'relevance_threshold' => $minRelevance,
        'is_paused' => false,
        'stop_flag' => false,
        'logs' => []
    ];

    file_put_contents($progressFilePath, json_encode($data));
    file_put_contents($timeFilePath, json_encode(['start_time' => microtime(true)]));

    header("Location: ?process_id=" . urlencode($processID));
    exit;
}

/**********************************************
 * 10) MOSTRAR FORMULÁRIO INICIAL
 **********************************************/
if (!file_exists($progressFilePath)) {
    renderHeader('crawler');
    ?>
    <main class="main-content">
        <style>
            .form-container {
                background: var(--bg-secondary);
                border-radius: 16px;
                padding: 40px;
                max-width: 800px;
                margin: 0 auto;
            }
            .form-title {
                font-size: 28px;
                margin-bottom: 10px;
                display: flex;
                align-items: center;
                gap: 12px;
            }
            .form-subtitle {
                color: var(--text-secondary);
                margin-bottom: 30px;
            }
            .form-section {
                margin-bottom: 25px;
            }
            .form-section-title {
                font-size: 16px;
                font-weight: 600;
                margin-bottom: 15px;
                color: var(--accent);
                display: flex;
                align-items: center;
                gap: 8px;
            }
            .form-group {
                margin-bottom: 20px;
            }
            .form-label {
                display: block;
                margin-bottom: 8px;
                font-weight: 500;
                color: var(--text-secondary);
            }
            textarea, input[type="text"], input[type="number"] {
                width: 100%;
                padding: 12px 15px;
                border: 1px solid rgba(255,255,255,0.1);
                border-radius: 10px;
                background: var(--bg-primary);
                color: var(--text-primary);
                font-family: 'Inter', sans-serif;
                font-size: 14px;
                transition: border-color 0.2s;
            }
            textarea:focus, input:focus {
                outline: none;
                border-color: var(--accent);
            }
            textarea { resize: vertical; min-height: 100px; }
            .row { display: grid; grid-template-columns: repeat(auto-fit, minmax(150px, 1fr)); gap: 15px; }
            .checkbox-group {
                display: flex;
                flex-wrap: wrap;
                gap: 10px;
            }
            .checkbox-item {
                display: flex;
                align-items: center;
                gap: 8px;
                padding: 10px 15px;
                background: var(--bg-primary);
                border: 1px solid rgba(255,255,255,0.1);
                border-radius: 8px;
                cursor: pointer;
                transition: all 0.2s;
            }
            .checkbox-item:hover {
                border-color: var(--accent);
            }
            .checkbox-item input:checked + span {
                color: var(--accent);
            }
            .btn-submit {
                display: block;
                width: 100%;
                padding: 16px;
                background: linear-gradient(135deg, var(--accent), #00cc6a);
                color: black;
                border: none;
                border-radius: 10px;
                font-size: 16px;
                font-weight: 600;
                cursor: pointer;
                transition: all 0.2s;
            }
            .btn-submit:hover {
                transform: translateY(-2px);
                box-shadow: 0 5px 20px rgba(0,255,136,0.3);
            }
        </style>

        <div class="form-container">
            <h1 class="form-title">🚀 Crawler Profissional Unificado</h1>
            <p class="form-subtitle">Busca inteligente com múltiplos motores e deep crawling avançado</p>

            <form method="POST" action="?process_id=<?= htmlspecialchars($processID) ?>">

                <div class="form-section">
                    <h3 class="form-section-title">🔍 Termos de Busca</h3>

                    <div class="form-group">
                        <label class="form-label">Termos de busca (um por linha)</label>
                        <textarea name="search_terms" rows="4" placeholder="web scraping tutorial&#10;python crawler guide&#10;data extraction"></textarea>
                    </div>

                    <div class="form-group">
                        <label class="form-label">Termo complementar (adicionado a todos os termos)</label>
                        <input type="text" name="common_term" placeholder="Ex: 2024 news">
                    </div>
                </div>

                <div class="form-section">
                    <h3 class="form-section-title">🌐 URLs Seed</h3>

                    <div class="form-group">
                        <label class="form-label">Lista de sites (um por linha)</label>
                        <textarea name="site_list" rows="3" placeholder="https://example.com&#10;https://blog.example.org"></textarea>
                    </div>

                    <div class="form-group">
                        <label class="form-label">Links individuais (um por linha)</label>
                        <textarea name="individual_links" rows="3" placeholder="https://example.com/page1&#10;https://example.com/page2"></textarea>
                    </div>
                </div>

                <div class="form-section">
                    <h3 class="form-section-title">🔎 Motores de Busca</h3>
                    <div class="checkbox-group">
                        <label class="checkbox-item">
                            <input type="checkbox" name="engine_bing" value="1" checked>
                            <span>🔵 Bing</span>
                        </label>
                        <label class="checkbox-item">
                            <input type="checkbox" name="engine_ddg" value="1" checked>
                            <span>🦆 DuckDuckGo</span>
                        </label>
                        <label class="checkbox-item">
                            <input type="checkbox" name="engine_yandex" value="1">
                            <span>🔴 Yandex</span>
                        </label>
                        <label class="checkbox-item">
                            <input type="checkbox" name="engine_baidu" value="1">
                            <span>🔵 Baidu</span>
                        </label>
                        <label class="checkbox-item">
                            <input type="checkbox" name="engine_yahoojp" value="1">
                            <span>🟣 Yahoo JP</span>
                        </label>
                    </div>
                </div>

                <div class="form-section">
                    <h3 class="form-section-title">⚙️ Configurações</h3>

                    <div class="row">
                        <div class="form-group">
                            <label class="form-label">Profundidade máxima</label>
                            <input type="number" name="max_depth" value="2" min="1" max="10">
                        </div>
                        <div class="form-group">
                            <label class="form-label">Máx. posts a importar</label>
                            <input type="number" name="max_posts" value="100" min="1" max="100000">
                        </div>
                        <div class="form-group">
                            <label class="form-label">Páginas por buscador</label>
                            <input type="number" name="max_search_pages" value="5" min="1" max="20">
                        </div>
                        <div class="form-group">
                            <label class="form-label">Links por página (deep)</label>
                            <input type="number" name="max_links_per_page" value="50" min="1" max="500">
                        </div>
                        <div class="form-group">
                            <label class="form-label">Relevância mínima</label>
                            <input type="number" name="min_relevance" value="2" min="0" max="100">
                        </div>
                    </div>

                    <div class="checkbox-group" style="margin-top: 15px;">
                        <label class="checkbox-item">
                            <input type="checkbox" name="infinite_crawl" value="1">
                            <span>♾️ Modo Infinito</span>
                        </label>
                        <label class="checkbox-item">
                            <input type="checkbox" name="limit_to_same_domain" value="1">
                            <span>🔒 Limitar ao mesmo domínio</span>
                        </label>
                    </div>
                </div>

                <button type="submit" class="btn-submit">▶️ Iniciar Crawler</button>
            </form>
        </div>
    </main>
    <?php
    renderFooter();
    exit;
}

/**********************************************
 * 11) PROCESSAMENTO DO CRAWLER
 **********************************************/
$data = json_decode(file_get_contents($progressFilePath), true);
if (!$data) die("Erro ao ler progress.json");

// Verificar stop_flag
if (!empty($data['stop_flag'])) {
    array_map('unlink', glob($procDir . '/*.*'));
    @rmdir($procDir);
    header("Location: ?action=manager");
    exit;
}

// Configurações de fases
$PHASES = [
    ['concurrency' => 100, 'timeout' => 5],
    ['concurrency' => 50, 'timeout' => 10],
    ['concurrency' => 25, 'timeout' => 15],
    ['concurrency' => 10, 'timeout' => 20],
];

// Carregar dados
$maxDepth = $data['max_depth'];
$maxPosts = $data['max_posts'];
$infiniteMode = $data['infinite_crawl'];
$limitToSameDomain = $data['limit_to_same_domain'];
$maxLinksPerPage = $data['max_links_per_page'] ?? 50;
$minRelevance = $data['relevance_threshold'] ?? 2;
$searchTerms = $data['search_terms'] ?? [];
$phaseQueues = $data['phaseQueues'];
$phaseIndex = $data['phaseIndex'] ?? 0;
$isPaused = !empty($data['is_paused']);

// Calcular tempo
$timeData = file_exists($timeFilePath) ? json_decode(file_get_contents($timeFilePath), true) : [];
$startTime = $timeData['start_time'] ?? microtime(true);
$elapsed = microtime(true) - $startTime;

// Calcular restantes
$remCount = 0;
foreach ($phaseQueues as $q) $remCount += count($q);

$processedSoFar = max($data['processed_links'], 1);
$avgTimePerLink = $elapsed / $processedSoFar;
$remainingTime = $avgTimePerLink * $remCount;

$data['elapsed_time'] = $elapsed;
$data['remaining_time'] = $remainingTime;
$data['processing_rate'] = round($data['processed_links'] / max($elapsed, 1), 2);

// Se pausado ou concluído, mostrar progresso
if ($isPaused || $remCount === 0 || $data['imported_links'] >= $maxPosts) {
    renderProgressPage($data, $processID, $isPaused, $remCount === 0 || $data['imported_links'] >= $maxPosts);
    exit;
}

// Processar batch
if (!isset($PHASES[$phaseIndex])) $phaseIndex = 3;
$currentConc = $PHASES[$phaseIndex]['concurrency'];
$currentTimeout = $PHASES[$phaseIndex]['timeout'];

$BATCH_SIZE = 10;
$linkBatch = array_splice($phaseQueues[$phaseIndex], 0, $BATCH_SIZE);
$batchStartTime = microtime(true);

$logs = $data['logs'] ?? [];

// Processar cada link
foreach ($linkBatch as $item) {
    $url = $item['url'];
    $depth = $item['depth'];

    // Verificar duplicado
    if (checkAndMarkSeenLink($pdo, $processID, $url, $depth)) {
        $data['ignored_links']++;
        $logs[] = ['type' => 'warning', 'msg' => "Duplicado: " . substr($url, 0, 60)];
        continue;
    }

    // Buscar página
    $results = fetchBatchWebInfo([['url' => $url, 'depth' => $depth]], $currentConc, $currentTimeout, true);
    $pk = "$url|depth=$depth";

    if (empty($results[$pk]['domHtml'])) {
        handleCurlFailure($url, $phaseIndex, $item, $phaseQueues, $logs, $data['processed_links'], $data['error_links'], $data['ignored_links']);
        continue;
    }

    $html = $results[$pk]['domHtml'];
    $dom = new DOMDocument();
    @$dom->loadHTML('<?xml encoding="UTF-8">' . $html);
    $xp = new DOMXPath($dom);

    // Verificar relevância
    if (!isRelevant($dom, $url, $searchTerms, $minRelevance, $GLOBALS['FORCED_DOMAINS'])) {
        $data['ignored_links']++;
        $logs[] = ['type' => 'warning', 'msg' => "Baixa relevância: " . substr($url, 0, 50)];
        $data['processed_links']++;
        continue;
    }

    // Extrair metadados
    $title = getTitle($xp);
    $description = getDescription($xp);
    $thumbnail = getThumbnail($xp, $url);
    $favicon = getFavicon($xp, $url);
    $canonicalUrl = getCanonicalUrl($xp, $url);
    $tags = extractTagsFromTitle($title);
    $author = generateRandomAuthor();
    $userId = getOrCreateUserId($pdo, $author);

    // Inserir no banco
    try {
        $check = $pdo->prepare("SELECT id FROM pinfeeds WHERE link = ? LIMIT 1");
        $check->execute([$canonicalUrl]);

        if (!$check->fetch()) {
            $stmt = $pdo->prepare("
                INSERT INTO pinfeeds (title, description, link, thumbnail, source_favicon, tags, author, user_id, created_at, status)
                VALUES (?, ?, ?, ?, ?, ?, ?, ?, NOW(), 'active')
            ");
            $stmt->execute([
                $title,
                $description,
                $canonicalUrl,
                $thumbnail,
                $favicon,
                implode(',', array_slice($tags, 0, 5)),
                $author,
                $userId
            ]);

            $data['imported_links']++;
            $logs[] = ['type' => 'success', 'msg' => "Importado: " . substr($title, 0, 50)];
        } else {
            $data['ignored_links']++;
            $logs[] = ['type' => 'warning', 'msg' => "Já existe: " . substr($url, 0, 50)];
        }
    } catch (Exception $e) {
        $data['error_links']++;
        $logs[] = ['type' => 'error', 'msg' => "Erro DB: " . substr($e->getMessage(), 0, 50)];
    }

    $data['processed_links']++;

    // Deep crawl - adicionar novos links
    if ($depth < $maxDepth && $data['imported_links'] < $maxPosts) {
        $newLinks = [];
        foreach ($dom->getElementsByTagName('a') as $a) {
            $href = $a->getAttribute('href');
            if (!$href || shouldIgnoreLinkImmediately($href) || isSearchEngineUrl($href)) continue;

            // Normalizar
            if (strpos($href, 'http') !== 0) {
                $p = parse_url($url);
                $base = ($p['scheme'] ?? 'https') . '://' . $p['host'];
                $href = $base . '/' . ltrim($href, '/');
            }

            $href = cleanUrl($href);
            if (!isValidUrl($href)) continue;

            // Verificar mesmo domínio
            if ($limitToSameDomain) {
                $baseDomain = parse_url($url, PHP_URL_HOST);
                $linkDomain = parse_url($href, PHP_URL_HOST);
                if ($baseDomain !== $linkDomain) continue;
            }

            $newLinks[] = $href;
        }

        $added = 0;
        foreach (array_slice(array_unique($newLinks), 0, $maxLinksPerPage) as $newUrl) {
            $phaseQueues[0][] = ['url' => $newUrl, 'depth' => $depth + 1];
            $data['total_links']++;
            $added++;
        }

        if ($added > 0) {
            $logs[] = ['type' => 'info', 'msg' => "Encontrados $added novos links"];
        }
    }

    // Verificar limite
    if ($data['imported_links'] >= $maxPosts) {
        $logs[] = ['type' => 'success', 'msg' => "Meta de $maxPosts posts atingida!"];
        break;
    }
}

// Atualizar tempos
$batchTime = microtime(true) - $batchStartTime;
$data['last_batch_time'] = $batchTime;
$data['max_lote_time'] = max($data['max_lote_time'], $batchTime);
$data['lotes_processados']++;

// Manter últimos 100 logs
$data['logs'] = array_slice($logs, -100);
$data['phaseQueues'] = $phaseQueues;
$data['phaseIndex'] = $phaseIndex;

// Avançar fase se necessário
$queuesEmpty = array_sum(array_map('count', array_slice($phaseQueues, 0, $phaseIndex + 1))) === 0;
if ($queuesEmpty && $phaseIndex < 3) {
    $data['phaseIndex']++;
}

// Salvar
file_put_contents($progressFilePath, json_encode($data));

// Renderizar e continuar
renderProgressPage($data, $processID, false, false);

/**********************************************
 * FUNÇÃO DE RENDERIZAÇÃO DO PROGRESSO
 **********************************************/
function renderProgressPage($data, $processID, $isPaused, $isFinished) {
    $total = max(1, $data['total_links']);
    $processed = $data['processed_links'];
    $imported = $data['imported_links'];
    $ignored = $data['ignored_links'];
    $errors = $data['error_links'];

    $remCount = 0;
    foreach ($data['phaseQueues'] as $q) $remCount += count($q);

    $percentProcessed = round(($processed / $total) * 100);
    $percentImported = round(($imported / $total) * 100);

    $elapsed = $data['elapsed_time'] ?? 0;
    $remaining = $data['remaining_time'] ?? 0;
    $rate = $data['processing_rate'] ?? 0;
    $mem = round(memory_get_usage(true) / 1024 / 1024, 1);

    $elapsedStr = gmdate("H:i:s", (int)$elapsed);
    $remainingStr = gmdate("H:i:s", (int)$remaining);

    renderHeader('crawler');
    ?>
    <main class="main-content <?= (!$isPaused && !$isFinished) ? 'auto-refresh' : '' ?>">
        <style>
            .progress-container {
                background: var(--bg-secondary);
                border-radius: 16px;
                padding: 30px;
                margin-bottom: 20px;
            }
            .progress-header {
                display: flex;
                justify-content: space-between;
                align-items: center;
                margin-bottom: 20px;
            }
            .progress-title {
                font-size: 24px;
                display: flex;
                align-items: center;
                gap: 10px;
            }
            .status-badge-lg {
                padding: 8px 16px;
                border-radius: 20px;
                font-weight: 600;
            }
            .status-running { background: rgba(34,197,94,0.2); color: var(--success); }
            .status-paused { background: rgba(245,158,11,0.2); color: var(--warning); }
            .status-finished { background: rgba(59,130,246,0.2); color: var(--info); }

            .progress-bars {
                margin-bottom: 30px;
            }
            .progress-bar-container {
                margin-bottom: 15px;
            }
            .progress-bar-label {
                display: flex;
                justify-content: space-between;
                margin-bottom: 5px;
                font-size: 14px;
            }
            .progress-bar {
                height: 24px;
                background: var(--bg-primary);
                border-radius: 12px;
                overflow: hidden;
            }
            .progress-bar-fill {
                height: 100%;
                border-radius: 12px;
                display: flex;
                align-items: center;
                justify-content: center;
                color: white;
                font-size: 12px;
                font-weight: 600;
                transition: width 0.5s;
            }
            .fill-green { background: linear-gradient(90deg, #22c55e, #16a34a); }
            .fill-blue { background: linear-gradient(90deg, #3b82f6, #2563eb); }
            .fill-purple { background: linear-gradient(90deg, #8b5cf6, #7c3aed); }

            .stats-grid {
                display: grid;
                grid-template-columns: repeat(auto-fit, minmax(140px, 1fr));
                gap: 15px;
                margin-bottom: 30px;
            }
            .stat-card {
                background: var(--bg-primary);
                padding: 20px;
                border-radius: 12px;
                text-align: center;
            }
            .stat-value {
                font-size: 32px;
                font-weight: 700;
                color: var(--accent);
            }
            .stat-value.success { color: var(--success); }
            .stat-value.warning { color: var(--warning); }
            .stat-value.error { color: var(--error); }
            .stat-value.info { color: var(--info); }
            .stat-label {
                font-size: 12px;
                color: var(--text-secondary);
                margin-top: 5px;
            }

            .actions-bar {
                display: flex;
                gap: 10px;
                margin-bottom: 30px;
                flex-wrap: wrap;
            }
            .btn { padding: 10px 20px; border-radius: 8px; text-decoration: none; font-weight: 500; transition: all 0.2s; display: inline-flex; align-items: center; gap: 8px; }
            .btn-success { background: var(--success); color: white; }
            .btn-warning { background: var(--warning); color: black; }
            .btn-danger { background: var(--error); color: white; }
            .btn-primary { background: var(--info); color: white; }
            .btn:hover { transform: translateY(-2px); }

            .terminal {
                background: #0a0a15;
                border-radius: 12px;
                padding: 20px;
                font-family: 'JetBrains Mono', monospace;
                font-size: 13px;
                max-height: 350px;
                overflow-y: auto;
            }
            .log-line {
                padding: 6px 0;
                border-bottom: 1px solid rgba(255,255,255,0.03);
            }
            .log-success { color: var(--success); }
            .log-warning { color: var(--warning); }
            .log-error { color: var(--error); }
            .log-info { color: var(--info); }
            .cursor-blink {
                animation: blink 1s steps(1) infinite;
            }
            @keyframes blink { 50% { opacity: 0; } }
        </style>

        <div class="progress-container">
            <div class="progress-header">
                <h1 class="progress-title">
                    <?= $isFinished ? '✅' : ($isPaused ? '⏸️' : '🔄') ?>
                    Processo <?= htmlspecialchars(substr($processID, 0, 15)) ?>
                </h1>
                <span class="status-badge-lg <?= $isFinished ? 'status-finished' : ($isPaused ? 'status-paused' : 'status-running') ?>">
                    <?= $isFinished ? 'Concluído' : ($isPaused ? 'Pausado' : 'Executando') ?>
                </span>
            </div>

            <div class="actions-bar">
                <?php if (!$isFinished): ?>
                    <?php if ($isPaused): ?>
                        <a href="?action=resume&process_id=<?= urlencode($processID) ?>" class="btn btn-success">▶ Continuar</a>
                    <?php else: ?>
                        <a href="?action=pause&process_id=<?= urlencode($processID) ?>" class="btn btn-warning">⏸ Pausar</a>
                    <?php endif; ?>
                <?php endif; ?>
                <a href="?action=stop&process_id=<?= urlencode($processID) ?>" class="btn btn-danger">⏹ Parar</a>
                <a href="?action=manager" class="btn btn-primary">📋 Gerenciador</a>
                <a href="?" class="btn btn-primary">+ Novo</a>
            </div>

            <div class="progress-bars">
                <div class="progress-bar-container">
                    <div class="progress-bar-label">
                        <span>Processamento Total</span>
                        <span><?= $percentProcessed ?>%</span>
                    </div>
                    <div class="progress-bar">
                        <div class="progress-bar-fill fill-purple" style="width: <?= $percentProcessed ?>%"><?= $percentProcessed ?>%</div>
                    </div>
                </div>
                <div class="progress-bar-container">
                    <div class="progress-bar-label">
                        <span>Importados</span>
                        <span><?= $imported ?> / <?= $data['max_posts'] ?></span>
                    </div>
                    <div class="progress-bar">
                        <div class="progress-bar-fill fill-green" style="width: <?= min(100, round(($imported / max(1, $data['max_posts'])) * 100)) ?>%"></div>
                    </div>
                </div>
            </div>

            <div class="stats-grid">
                <div class="stat-card">
                    <div class="stat-value"><?= number_format($total) ?></div>
                    <div class="stat-label">Total Links</div>
                </div>
                <div class="stat-card">
                    <div class="stat-value"><?= number_format($processed) ?></div>
                    <div class="stat-label">Processados</div>
                </div>
                <div class="stat-card">
                    <div class="stat-value success"><?= number_format($imported) ?></div>
                    <div class="stat-label">Importados</div>
                </div>
                <div class="stat-card">
                    <div class="stat-value warning"><?= number_format($ignored) ?></div>
                    <div class="stat-label">Ignorados</div>
                </div>
                <div class="stat-card">
                    <div class="stat-value error"><?= number_format($errors) ?></div>
                    <div class="stat-label">Erros</div>
                </div>
                <div class="stat-card">
                    <div class="stat-value info"><?= number_format($remCount) ?></div>
                    <div class="stat-label">Na Fila</div>
                </div>
                <div class="stat-card">
                    <div class="stat-value"><?= $rate ?></div>
                    <div class="stat-label">Links/seg</div>
                </div>
                <div class="stat-card">
                    <div class="stat-value"><?= $elapsedStr ?></div>
                    <div class="stat-label">Tempo</div>
                </div>
                <div class="stat-card">
                    <div class="stat-value"><?= $remainingStr ?></div>
                    <div class="stat-label">Restante</div>
                </div>
                <div class="stat-card">
                    <div class="stat-value"><?= $mem ?></div>
                    <div class="stat-label">MB RAM</div>
                </div>
                <div class="stat-card">
                    <div class="stat-value"><?= $data['max_depth'] ?></div>
                    <div class="stat-label">Profundidade</div>
                </div>
                <div class="stat-card">
                    <div class="stat-value"><?= $data['lotes_processados'] ?></div>
                    <div class="stat-label">Batches</div>
                </div>
            </div>

            <h3 style="margin-bottom: 15px; display: flex; align-items: center; gap: 10px;">📜 Log em Tempo Real</h3>
            <div class="terminal">
                <?php
                $logs = array_reverse($data['logs'] ?? []);
                foreach (array_slice($logs, 0, 50) as $log):
                    $class = 'log-' . ($log['type'] ?? 'info');
                ?>
                    <div class="log-line <?= $class ?>"><?= htmlspecialchars($log['msg'] ?? '') ?></div>
                <?php endforeach; ?>
                <?php if (empty($logs)): ?>
                    <div class="log-line log-info">Aguardando processamento...</div>
                <?php endif; ?>
                <div class="cursor-blink" style="color: var(--accent);">█</div>
            </div>
        </div>
    </main>
    <?php
    renderFooter();
}
