import requests
from bs4 import BeautifulSoup
import json
import re
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
import time
from urllib.parse import urljoin, urlparse
import logging

class AliExpressParser:
    def __init__(self, use_selenium=True):
        self.use_selenium = use_selenium
        self.session = requests.Session()
        self.session.headers.update({
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        })
        
        if self.use_selenium:
            self.setup_selenium()
    
    def setup_selenium(self):
        """Настройка Selenium WebDriver"""
        chrome_options = Options()
        chrome_options.add_argument('--headless')
        chrome_options.add_argument('--no-sandbox')
        chrome_options.add_argument('--disable-dev-shm-usage')
        chrome_options.add_argument('--disable-gpu')
        chrome_options.add_argument('--window-size=1920,1080')
        chrome_options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36')
        
        try:
            self.driver = webdriver.Chrome(options=chrome_options)
            self.wait = WebDriverWait(self.driver, 10)
        except Exception as e:
            logging.error(f"Ошибка инициализации Selenium: {e}")
            self.use_selenium = False
    
    def parse_product(self, url):
        """Основной метод парсинга товара"""
        try:
            if self.use_selenium:
                return self._parse_with_selenium(url)
            else:
                return self._parse_with_requests(url)
        except Exception as e:
            logging.error(f"Ошибка парсинга: {e}")
            return None
    
    def _parse_with_selenium(self, url):
        """Парсинг с использованием Selenium"""
        try:
            self.driver.get(url)
            time.sleep(3)
            
            # Ждем загрузки основных элементов
            self.wait.until(EC.presence_of_element_located((By.TAG_NAME, "body")))
            
            # Получаем HTML после загрузки JavaScript
            soup = BeautifulSoup(self.driver.page_source, 'html.parser')
            
            return self._extract_data(soup, url)
            
        except Exception as e:
            logging.error(f"Ошибка Selenium парсинга: {e}")
            return None
    
    def _parse_with_requests(self, url):
        """Парсинг с использованием requests"""
        try:
            response = self.session.get(url, timeout=10)
            response.raise_for_status()
            
            soup = BeautifulSoup(response.content, 'html.parser')
            return self._extract_data(soup, url)
            
        except Exception as e:
            logging.error(f"Ошибка requests парсинга: {e}")
            return None
    
    def _extract_data(self, soup, url):
        """Извлечение данных из HTML"""
        result = {
            "title": "",
            "price": 0,
            "aliexpress_link": url,
            "category": "",
            "rating": 0,
            "orders_count": 0,
            "images": [],
            "ai_description": ""
        }
        
        # Извлечение названия товара
        title_selectors = [
            'h1[data-pl="product-title"]',
            'h1.product-title-text',
            'h1[class*="title"]',
            '.product-title h1',
            'h1'
        ]
        
        result["title"] = self._extract_text_by_selectors(soup, title_selectors)
        
        # Извлечение цены
        price_selectors = [
            '.product-price-current',
            '[class*="price-current"]',
            '.price-current',
            '[data-spm-anchor-id*="price"]',
            '.price'
        ]
        
        price_text = self._extract_text_by_selectors(soup, price_selectors)
        result["price"] = self._parse_price(price_text)
        
        # Извлечение рейтинга
        rating_selectors = [
            '.overview-rating-average',
            '[class*="rating-average"]',
            '.rating-average',
            '.star-rating'
        ]
        
        rating_text = self._extract_text_by_selectors(soup, rating_selectors)
        result["rating"] = self._parse_rating(rating_text)
        
        # Извлечение количества заказов
        orders_selectors = [
            '.product-reviewer-reviews',
            '[class*="sold"]',
            '.sold-count',
            '.order-count'
        ]
        
        orders_text = self._extract_text_by_selectors(soup, orders_selectors)
        result["orders_count"] = self._parse_orders_count(orders_text)
        
        # Извлечение категории
        category_selectors = [
            '.breadcrumb a',
            '.nav-breadcrumb a',
            '[class*="breadcrumb"] a'
        ]
        
        result["category"] = self._extract_category(soup, category_selectors)
        
        # Извлечение изображений
        result["images"] = self._extract_images(soup)
        
        # Генерация AI описания
        result["ai_description"] = self._generate_ai_description(result)
        
        return result
    
    def _extract_text_by_selectors(self, soup, selectors):
        """Извлечение текста по списку CSS селекторов"""
        for selector in selectors:
            try:
                element = soup.select_one(selector)
                if element:
                    return element.get_text(strip=True)
            except:
                continue
        return ""
    
    def _parse_price(self, price_text):
        """Парсинг цены из текста"""
        if not price_text:
            return 0
        
        # Извлекаем числа из текста
        numbers = re.findall(r'[\d,]+\.?\d*', price_text.replace(',', ''))
        if numbers:
            try:
                return float(numbers[0])
            except:
                return 0
        return 0
    
    def _parse_rating(self, rating_text):
        """Парсинг рейтинга из текста"""
        if not rating_text:
            return 0
        
        # Ищем число от 0 до 5
        match = re.search(r'([0-4]\.?\d*|5\.?0*)', rating_text)
        if match:
            try:
                return float(match.group(1))
            except:
                return 0
        return 0
    
    def _parse_orders_count(self, orders_text):
        """Парсинг количества заказов из текста"""
        if not orders_text:
            return 0
        
        # Ищем числа, возможно с k, K для тысяч
        match = re.search(r'(\d+(?:,\d+)*(?:\.\d+)?)\s*[kK]?', orders_text)
        if match:
            try:
                number = float(match.group(1).replace(',', ''))
                if 'k' in orders_text.lower() or 'K' in orders_text:
                    number *= 1000
                return int(number)
            except:
                return 0
        return 0
    
    def _extract_category(self, soup, selectors):
        """Извлечение категории товара"""
        for selector in selectors:
            try:
                elements = soup.select(selector)
                if elements and len(elements) > 1:
                    # Берем предпоследний элемент breadcrumb
                    return elements[-2].get_text(strip=True)
            except:
                continue
        return "unknown"
    
    def _extract_images(self, soup):
        """Извлечение изображений товара"""
        images = []
        
        # Различные селекторы для изображений
        image_selectors = [
            'img[class*="image-item"]',
            '.image-item img',
            '.product-image img',
            '.gallery-image img',
            'img[src*="alicdn"]'
        ]
        
        for selector in image_selectors:
            try:
                img_elements = soup.select(selector)
                for img in img_elements:
                    src = img.get('src') or img.get('data-src')
                    if src and src.startswith('http'):
                        # Убираем параметры размера для получения оригинала
                        clean_src = re.sub(r'_\d+x\d+\.', '_', src)
                        if clean_src not in images:
                            images.append(clean_src)
                
                if images:
                    break
            except:
                continue
        
        return images[:10]  # Ограничиваем до 10 изображений
    
    def _generate_ai_description(self, product_data):
        """Генерация AI описания товара"""
        # Простое описание на основе доступных данных
        title = product_data.get("title", "")
        price = product_data.get("price", 0)
        rating = product_data.get("rating", 0)
        orders = product_data.get("orders_count", 0)
        category = product_data.get("category", "")
        
        description = f"Товар '{title}' из категории '{category}' по цене ${price:.2f}."
        
        if rating > 0:
            description += f" Рейтинг: {rating} звезд."
        
        if orders > 0:
            description += f" Уже заказано: {orders} раз."
        
        return description
    
    def close(self):
        """Закрытие драйвера"""
        if self.use_selenium and hasattr(self, 'driver'):
            self.driver.quit()

def parse_aliexpress_product(url):
    """Функция для парсинга товара AliExpress"""
    parser = AliExpressParser(use_selenium=True)
    
    try:
        result = parser.parse_product(url)
        return result
    finally:
        parser.close()

# Пример использования
if __name__ == "__main__":
    # Тестовый URL
    test_url = "https://pl.aliexpress.com/item/1005007191149791.html"
    
    print("Парсинг товара AliExpress...")
    result = parse_aliexpress_product(test_url)
    
    if result:
        print(json.dumps(result, indent=2, ensure_ascii=False))
    else:
        print("Ошибка при парсинге товара")