<?php
/**
 * FLOWB0T NEXUS - Bing Search Provider
 * Handles Bing search API integration for web, images, and videos
 *
 * @package Flowb0t\Engine\SearchProviders
 * @version 1.0.0
 */

namespace Flowb0t\Engine\SearchProviders;

class BingProvider {
    private array $config;
    private array $userAgents = [
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0',
    ];

    const RESULTS_PER_PAGE = 10;

    /**
     * Constructor
     */
    public function __construct(array $config = []) {
        $this->config = array_merge([
            'market' => 'en-US',
            'safe_search' => 'Moderate',
            'freshness' => '', // Day, Week, Month, or empty
        ], $config);
    }

    /**
     * Search Bing for a term
     *
     * @param string $term Search term
     * @param string $type Type: web, images, videos
     * @param int $page Page number (0-indexed)
     * @return array Array of URLs
     */
    public function search(string $term, string $type = 'web', int $page = 0): array {
        $urls = [];

        switch ($type) {
            case 'web':
                $urls = $this->searchWeb($term, $page);
                break;
            case 'images':
                $urls = $this->searchImages($term, $page);
                break;
            case 'videos':
                $urls = $this->searchVideos($term, $page);
                break;
            default:
                $urls = $this->searchWeb($term, $page);
        }

        return $urls;
    }

    /**
     * Search web pages
     */
    private function searchWeb(string $term, int $page): array {
        $offset = $page * self::RESULTS_PER_PAGE;
        $encodedTerm = urlencode($term);

        $url = "https://www.bing.com/search?q={$encodedTerm}&first={$offset}&count=" . self::RESULTS_PER_PAGE;

        $html = $this->fetchPage($url);
        if (empty($html)) {
            return [];
        }

        return $this->parseWebResults($html);
    }

    /**
     * Search images
     */
    private function searchImages(string $term, int $page): array {
        $offset = $page * self::RESULTS_PER_PAGE;
        $encodedTerm = urlencode($term);

        $url = "https://www.bing.com/images/search?q={$encodedTerm}&first={$offset}&count=" . self::RESULTS_PER_PAGE;

        $html = $this->fetchPage($url);
        if (empty($html)) {
            return [];
        }

        return $this->parseImageResults($html);
    }

    /**
     * Search videos
     */
    private function searchVideos(string $term, int $page): array {
        $offset = $page * self::RESULTS_PER_PAGE;
        $encodedTerm = urlencode($term);

        $url = "https://www.bing.com/videos/search?q={$encodedTerm}&first={$offset}&count=" . self::RESULTS_PER_PAGE;

        $html = $this->fetchPage($url);
        if (empty($html)) {
            return [];
        }

        return $this->parseVideoResults($html);
    }

    /**
     * Fetch a page using cURL
     */
    private function fetchPage(string $url): string {
        $ch = curl_init();

        curl_setopt_array($ch, [
            CURLOPT_URL            => $url,
            CURLOPT_RETURNTRANSFER => true,
            CURLOPT_FOLLOWLOCATION => true,
            CURLOPT_MAXREDIRS      => 5,
            CURLOPT_TIMEOUT        => 15,
            CURLOPT_CONNECTTIMEOUT => 5,
            CURLOPT_SSL_VERIFYPEER => false,
            CURLOPT_SSL_VERIFYHOST => false,
            CURLOPT_ENCODING       => 'gzip,deflate',
            CURLOPT_USERAGENT      => $this->getRandomUserAgent(),
            CURLOPT_HTTPHEADER     => [
                'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
                'Accept-Language: en-US,en;q=0.5',
                'Connection: keep-alive',
                'Upgrade-Insecure-Requests: 1',
                'Cache-Control: max-age=0'
            ],
            CURLOPT_COOKIEJAR      => sys_get_temp_dir() . '/bing_cookies.txt',
            CURLOPT_COOKIEFILE     => sys_get_temp_dir() . '/bing_cookies.txt',
        ]);

        $response = curl_exec($ch);
        $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
        $error = curl_error($ch);
        curl_close($ch);

        if ($httpCode !== 200 || !empty($error)) {
            return '';
        }

        return $response;
    }

    /**
     * Parse web search results
     */
    private function parseWebResults(string $html): array {
        $urls = [];

        // Method 1: Parse <cite> tags (most reliable)
        if (preg_match_all('/<cite[^>]*>([^<]+)<\/cite>/i', $html, $matches)) {
            foreach ($matches[1] as $cite) {
                $cite = strip_tags($cite);
                $cite = trim($cite);

                // Skip if it's not a valid URL pattern
                if (empty($cite) || strpos($cite, '.') === false) {
                    continue;
                }

                // Clean and normalize
                $cite = preg_replace('/\s*›\s*/', '/', $cite);
                $cite = preg_replace('/\s+/', '', $cite);

                // Add protocol if missing
                if (strpos($cite, 'http') !== 0) {
                    $cite = 'https://' . $cite;
                }

                if (filter_var($cite, FILTER_VALIDATE_URL)) {
                    $urls[] = $cite;
                }
            }
        }

        // Method 2: Parse direct links
        if (preg_match_all('/href="(https?:\/\/[^"]+)"/i', $html, $matches)) {
            foreach ($matches[1] as $url) {
                // Skip Bing internal URLs
                if (strpos($url, 'bing.com') !== false ||
                    strpos($url, 'microsoft.com') !== false ||
                    strpos($url, 'msn.com') !== false) {
                    continue;
                }

                // Skip tracking URLs
                if (strpos($url, '/redirect') !== false ||
                    strpos($url, 'click.') !== false) {
                    continue;
                }

                if (filter_var($url, FILTER_VALIDATE_URL) && !in_array($url, $urls)) {
                    $urls[] = $url;
                }
            }
        }

        // Method 3: Parse data attributes (for dynamic content)
        if (preg_match_all('/data-url="([^"]+)"/i', $html, $matches)) {
            foreach ($matches[1] as $url) {
                $url = html_entity_decode($url);
                if (filter_var($url, FILTER_VALIDATE_URL) && !in_array($url, $urls)) {
                    $urls[] = $url;
                }
            }
        }

        return array_slice(array_unique($urls), 0, self::RESULTS_PER_PAGE);
    }

    /**
     * Parse image search results
     */
    private function parseImageResults(string $html): array {
        $urls = [];

        // Parse image source URLs
        if (preg_match_all('/m="([^"]+)"/i', $html, $matches)) {
            foreach ($matches[1] as $jsonData) {
                $data = @json_decode(html_entity_decode($jsonData), true);
                if (isset($data['purl'])) {
                    $url = $data['purl'];
                    if (filter_var($url, FILTER_VALIDATE_URL)) {
                        $urls[] = $url;
                    }
                }
            }
        }

        // Fallback: Parse murl (media URL)
        if (preg_match_all('/murl&quot;:&quot;([^&]+)&/i', $html, $matches)) {
            foreach ($matches[1] as $url) {
                $url = urldecode($url);
                if (filter_var($url, FILTER_VALIDATE_URL) && !in_array($url, $urls)) {
                    $urls[] = $url;
                }
            }
        }

        // Parse page URLs from images
        if (preg_match_all('/purl&quot;:&quot;([^&]+)&/i', $html, $matches)) {
            foreach ($matches[1] as $url) {
                $url = urldecode($url);
                if (filter_var($url, FILTER_VALIDATE_URL) && !in_array($url, $urls)) {
                    $urls[] = $url;
                }
            }
        }

        return array_slice(array_unique($urls), 0, self::RESULTS_PER_PAGE);
    }

    /**
     * Parse video search results
     */
    private function parseVideoResults(string $html): array {
        $urls = [];

        // Parse video source URLs
        if (preg_match_all('/data-src="([^"]+)"/i', $html, $matches)) {
            foreach ($matches[1] as $data) {
                // Extract URL from JSON data
                if (strpos($data, 'http') === 0 && filter_var($data, FILTER_VALIDATE_URL)) {
                    $urls[] = $data;
                }
            }
        }

        // Parse video page URLs
        if (preg_match_all('/href="(https?:\/\/(?:www\.)?(youtube\.com|vimeo\.com|dailymotion\.com)[^"]+)"/i', $html, $matches)) {
            foreach ($matches[1] as $url) {
                if (filter_var($url, FILTER_VALIDATE_URL) && !in_array($url, $urls)) {
                    $urls[] = $url;
                }
            }
        }

        // Parse general video links
        if (preg_match_all('/vid_src&quot;:&quot;([^&]+)&/i', $html, $matches)) {
            foreach ($matches[1] as $url) {
                $url = urldecode($url);
                if (filter_var($url, FILTER_VALIDATE_URL) && !in_array($url, $urls)) {
                    $urls[] = $url;
                }
            }
        }

        return array_slice(array_unique($urls), 0, self::RESULTS_PER_PAGE);
    }

    /**
     * Get random user agent
     */
    private function getRandomUserAgent(): string {
        return $this->userAgents[array_rand($this->userAgents)];
    }

    /**
     * Build search URL with all parameters
     */
    public function buildSearchUrl(string $term, string $type = 'web', int $page = 0): string {
        $offset = $page * self::RESULTS_PER_PAGE;
        $encodedTerm = urlencode($term);

        $baseUrls = [
            'web'    => 'https://www.bing.com/search',
            'images' => 'https://www.bing.com/images/search',
            'videos' => 'https://www.bing.com/videos/search',
        ];

        $params = [
            'q' => $term,
            'first' => $offset,
            'count' => self::RESULTS_PER_PAGE,
        ];

        if (!empty($this->config['market'])) {
            $params['mkt'] = $this->config['market'];
        }

        if (!empty($this->config['freshness'])) {
            $params['freshness'] = $this->config['freshness'];
        }

        return ($baseUrls[$type] ?? $baseUrls['web']) . '?' . http_build_query($params);
    }

    /**
     * Get estimated total results for a term
     */
    public function getEstimatedResults(string $term): int {
        $html = $this->fetchPage("https://www.bing.com/search?q=" . urlencode($term));

        if (preg_match('/class="sb_count"[^>]*>([0-9,]+)\s*results?/i', $html, $match)) {
            return (int) str_replace(',', '', $match[1]);
        }

        if (preg_match('/(\d+(?:,\d+)*)\s*results?/i', $html, $match)) {
            return (int) str_replace(',', '', $match[1]);
        }

        return 0;
    }
}
