
-- Criar tabelas NEXUS (versão compatível)

CREATE TABLE IF NOT EXISTS nexus_jobs (
    id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
    job_uuid CHAR(36) NOT NULL UNIQUE,
    user_id BIGINT UNSIGNED DEFAULT NULL,
    name VARCHAR(255) NOT NULL,
    description TEXT,
    job_type ENUM("search","domain","sitemap","direct","mixed") NOT NULL DEFAULT "search",
    search_terms LONGTEXT,
    target_domains LONGTEXT,
    direct_urls LONGTEXT,
    max_depth INT DEFAULT 3,
    max_pages INT DEFAULT 10000,
    max_concurrent INT DEFAULT 100,
    request_timeout INT DEFAULT 10,
    delay_between FLOAT DEFAULT 0.5,
    respect_robots TINYINT(1) DEFAULT 1,
    follow_redirects TINYINT(1) DEFAULT 1,
    search_provider ENUM("bing","google","duckduckgo","all") DEFAULT "bing",
    search_pages INT DEFAULT 10,
    search_types TEXT,
    relevance_threshold INT DEFAULT 2,
    min_content_length INT DEFAULT 100,
    allowed_domains LONGTEXT,
    blocked_domains LONGTEXT,
    allowed_languages TEXT,
    content_types TEXT,
    auto_import TINYINT(1) DEFAULT 1,
    import_to_pinfeeds TINYINT(1) DEFAULT 1,
    create_users TINYINT(1) DEFAULT 1,
    dedupe_by ENUM("url","title","content_hash","all") DEFAULT "url",
    status ENUM("pending","queued","running","paused","completed","failed","cancelled") DEFAULT "pending",
    progress_percent DECIMAL(5,2) DEFAULT 0.00,
    current_phase INT DEFAULT 0,
    total_seeds INT DEFAULT 0,
    total_queued INT DEFAULT 0,
    total_processed INT DEFAULT 0,
    total_successful INT DEFAULT 0,
    total_failed INT DEFAULT 0,
    total_skipped INT DEFAULT 0,
    total_imported INT DEFAULT 0,
    total_duplicates INT DEFAULT 0,
    started_at DATETIME DEFAULT NULL,
    paused_at DATETIME DEFAULT NULL,
    completed_at DATETIME DEFAULT NULL,
    estimated_completion DATETIME DEFAULT NULL,
    priority INT DEFAULT 5,
    created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
    updated_at DATETIME ON UPDATE CURRENT_TIMESTAMP,
    INDEX idx_status (status),
    INDEX idx_user_id (user_id),
    INDEX idx_created_at (created_at)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;

CREATE TABLE IF NOT EXISTS nexus_queue (
    id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
    job_id BIGINT UNSIGNED NOT NULL,
    url_hash CHAR(64) NOT NULL,
    url VARCHAR(2048) NOT NULL,
    domain VARCHAR(255),
    source_type ENUM("seed","search","discovered") DEFAULT "seed",
    search_term VARCHAR(500) DEFAULT NULL,
    parent_url_hash CHAR(64) DEFAULT NULL,
    depth INT DEFAULT 0,
    priority INT DEFAULT 5,
    status ENUM("pending","processing","completed","failed","skipped") DEFAULT "pending",
    worker_id VARCHAR(100) DEFAULT NULL,
    retry_count INT DEFAULT 0,
    last_error TEXT,
    scheduled_for DATETIME DEFAULT CURRENT_TIMESTAMP,
    started_at DATETIME DEFAULT NULL,
    completed_at DATETIME DEFAULT NULL,
    UNIQUE KEY unique_job_url (job_id, url_hash),
    INDEX idx_job_status (job_id, status),
    INDEX idx_domain (domain)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;

CREATE TABLE IF NOT EXISTS nexus_results (
    id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
    job_id BIGINT UNSIGNED NOT NULL,
    queue_id BIGINT UNSIGNED NOT NULL,
    url_hash CHAR(64) NOT NULL,
    url VARCHAR(2048) NOT NULL,
    http_status INT,
    content_type VARCHAR(100),
    content_length INT,
    response_time_ms INT,
    final_url VARCHAR(2048),
    title VARCHAR(500),
    description TEXT,
    content LONGTEXT,
    content_hash CHAR(64),
    thumbnail VARCHAR(2048),
    favicon VARCHAR(500),
    canonical_url VARCHAR(2048),
    author VARCHAR(255),
    publish_date DATETIME,
    language VARCHAR(10),
    word_count INT,
    reading_time INT,
    relevance_score DECIMAL(5,2),
    matched_terms TEXT,
    internal_links INT DEFAULT 0,
    external_links INT DEFAULT 0,
    image_links INT DEFAULT 0,
    video_links INT DEFAULT 0,
    import_status ENUM("pending","imported","skipped","failed") DEFAULT "pending",
    imported_to TEXT,
    pinfeeds_id BIGINT UNSIGNED DEFAULT NULL,
    crawled_at DATETIME DEFAULT CURRENT_TIMESTAMP,
    imported_at DATETIME DEFAULT NULL,
    UNIQUE KEY unique_job_url (job_id, url_hash),
    INDEX idx_job_id (job_id),
    INDEX idx_import_status (import_status)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;

CREATE TABLE IF NOT EXISTS nexus_logs (
    id BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
    job_id BIGINT UNSIGNED DEFAULT NULL,
    log_level ENUM("DEBUG","INFO","WARNING","ERROR","CRITICAL") NOT NULL,
    category VARCHAR(50) NOT NULL,
    message TEXT NOT NULL,
    context TEXT,
    created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
    INDEX idx_job_id (job_id),
    INDEX idx_category (category)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;

CREATE TABLE IF NOT EXISTS nexus_settings (
    id INT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
    setting_key VARCHAR(100) NOT NULL UNIQUE,
    setting_value LONGTEXT,
    setting_type ENUM("string","int","float","bool","json") DEFAULT "string",
    description VARCHAR(500),
    updated_at DATETIME ON UPDATE CURRENT_TIMESTAMP
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;

INSERT IGNORE INTO nexus_settings (setting_key, setting_value, setting_type, description) VALUES
("max_concurrent_jobs", "10", "int", "Maximum concurrent running jobs"),
("max_concurrent_workers", "500", "int", "Maximum concurrent crawler workers"),
("default_request_timeout", "10", "int", "Default HTTP request timeout in seconds");
