import time
import json
import re
import random
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import undetected_chromedriver as uc
import psycopg2
from datetime import datetime
from urllib.parse import urljoin, urlparse
import requests

class RealAliExpressParser:
    def __init__(self):
        self.setup_driver()
        self.db_config = {
            'host': '127.0.0.1',
            'database': 'aliexpress_automation',
            'user': 'automation_user',
            'password': 'AutoPass123'
        }
        self.base_url = "https://www.aliexpress.com"
        self.session = requests.Session()
        
    def setup_driver(self):
        """Настройка undetected Chrome драйвера"""
        print("🔧 Настройка веб-драйвера...")
        
        options = uc.ChromeOptions()
        
        # Настройки для избежания обнаружения
        options.add_argument('--no-sandbox')
        options.add_argument('--disable-dev-shm-usage')
        options.add_argument('--disable-blink-features=AutomationControlled')
        options.add_argument('--disable-extensions')
        options.add_argument('--no-first-run')
        options.add_argument('--disable-default-apps')
        options.add_argument('--disable-infobars')
        options.add_argument('--window-size=1920,1080')
        
        # Для продакшна можно включить headless
        # options.add_argument('--headless')
        
        try:
            self.driver = uc.Chrome(options=options)
            print("✅ Undetected Chrome драйвер инициализирован")
        except:
            # Fallback на обычный Chrome
            print("⚠️ Используем обычный Chrome драйвер")
            chrome_options = Options()
            chrome_options.add_argument('--no-sandbox')
            chrome_options.add_argument('--disable-dev-shm-usage')
            chrome_options.add_argument('--disable-blink-features=AutomationControlled')
            chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
            chrome_options.add_experimental_option('useAutomationExtension', False)
            chrome_options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36')
            
            service = Service(ChromeDriverManager().install())
            self.driver = webdriver.Chrome(service=service, options=chrome_options)
            self.driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
    
    def random_delay(self, min_sec=2, max_sec=5):
        """Случайная задержка"""
        delay = random.uniform(min_sec, max_sec)
        time.sleep(delay)
    
    def clean_price(self, price_text):
        """Очистка и извлечение цены"""
        if not price_text:
            return 0
        
        # Убираем все кроме цифр, точек и запятых
        price_text = re.sub(r'[^\d.,]', '', price_text)
        
        # Заменяем запятые на точки
        price_text = price_text.replace(',', '.')
        
        # Находим числа
        numbers = re.findall(r'\d+\.?\d*', price_text)
        if numbers:
            try:
                return float(numbers[0])
            except:
                return 0
        return 0
    
    def extract_number(self, text):
        """Извлечение числа из текста"""
        if not text:
            return 0
        
        # Убираем пробелы и запятые в числах
        text = text.replace(' ', '').replace(',', '')
        numbers = re.findall(r'\d+', text)
        if numbers:
            return int(numbers[0])
        return 0
    
    def parse_search_page(self, search_query, max_products=10):
        """Парсинг страницы поиска"""
        print(f"🔍 Парсинг поиска: {search_query}")
        
        try:
            # Формируем URL поиска
            search_url = f"{self.base_url}/wholesale?SearchText={search_query.replace(' ', '+')}"
            print(f"📍 Переход на: {search_url}")
            
            self.driver.get(search_url)
            self.random_delay(5, 8)
            
            # Ждем загрузки результатов
            try:
                WebDriverWait(self.driver, 15).until(
                    EC.presence_of_element_located((By.CSS_SELECTOR, "[data-widget-cid*='search']"))
                )
            except:
                print("⚠️ Элементы поиска не загрузились, продолжаем...")
            
            print(f"📄 Текущая страница: {self.driver.title}")
            
            # Ищем ссылки на товары
            product_links = []
            
            # Различные селекторы для поиска ссылок на товары
            link_selectors = [
                "a[href*='/item/']",
                "a[href*='aliexpress.com/item']",
                "[data-widget-cid*='search'] a",
                ".search-item-card-wrapper a",
                ".list-item a"
            ]
            
            for selector in link_selectors:
                try:
                    elements = self.driver.find_elements(By.CSS_SELECTOR, selector)
                    print(f"🔗 Найдено {len(elements)} ссылок по селектору: {selector}")
                    
                    for element in elements:
                        href = element.get_attribute('href')
                        if href and '/item/' in href and 'aliexpress.com' in href:
                            # Очищаем URL от лишних параметров
                            clean_url = href.split('?')[0]
                            if clean_url not in product_links:
                                product_links.append(clean_url)
                                
                                if len(product_links) >= max_products:
                                    break
                    
                    if product_links:
                        break
                        
                except Exception as e:
                    print(f"❌ Ошибка с селектором {selector}: {e}")
                    continue
            
            print(f"🎯 Найдено уникальных ссылок: {len(product_links)}")
            return product_links[:max_products]
            
        except Exception as e:
            print(f"❌ Ошибка при парсинге поиска: {e}")
            return []
    
    def parse_product_page(self, product_url):
        """Парсинг страницы конкретного товара"""
        print(f"📦 Парсинг товара: {product_url}")
        
        try:
            self.driver.get(product_url)
            self.random_delay(3, 6)
            
            product_data = {
                'aliexpress_link': product_url,
                'status': 'active'
            }
            
            # Парсинг названия
            title_selectors = [
                "h1",
                "[data-pl='product-title']",
                ".product-title-text",
                ".product-name",
                "[class*='title']"
            ]
            
            product_data['title'] = "Товар с AliExpress"
            for selector in title_selectors:
                try:
                    element = self.driver.find_element(By.CSS_SELECTOR, selector)
                    title = element.text.strip()
                    if title and len(title) > 5:
                        product_data['title'] = title[:500]  # Ограничиваем длину
                        print(f"✅ Название: {title[:100]}...")
                        break
                except:
                    continue
            
            # Парсинг цены
            price_selectors = [
                "[class*='price'][class*='current']",
                "[class*='price-current']",
                ".price-now",
                ".product-price-current",
                "[data-spm-anchor-id*='price']"
            ]
            
            product_data['price'] = 0
            for selector in price_selectors:
                try:
                    elements = self.driver.find_elements(By.CSS_SELECTOR, selector)
                    for element in elements:
                        price_text = element.text.strip()
                        price = self.clean_price(price_text)
                        if price > 0:
                            product_data['price'] = price
                            print(f"✅ Цена: {price}")
                            break
                    if product_data['price'] > 0:
                        break
                except:
                    continue
            
            # Парсинг старой цены
            old_price_selectors = [
                "[class*='price'][class*='original']",
                "[class*='price-original']",
                ".price-old",
                ".product-price-del"
            ]
            
            product_data['original_price'] = None
            for selector in old_price_selectors:
                try:
                    element = self.driver.find_element(By.CSS_SELECTOR, selector)
                    old_price_text = element.text.strip()
                    old_price = self.clean_price(old_price_text)
                    if old_price > product_data['price']:
                        product_data['original_price'] = old_price
                        print(f"✅ Старая цена: {old_price}")
                        break
                except:
                    continue
            
            # Парсинг изображения
            img_selectors = [
                ".product-image img",
                ".images-view-item img",
                "[class*='image'] img",
                "img[src*='alicdn']"
            ]
            
            product_data['main_image'] = None
            for selector in img_selectors:
                try:
                    element = self.driver.find_element(By.CSS_SELECTOR, selector)
                    img_src = element.get_attribute('src')
                    if img_src and ('alicdn' in img_src or 'aliexpress' in img_src):
                        product_data['main_image'] = img_src
                        print(f"✅ Изображение найдено")
                        break
                except:
                    continue
            
            # Парсинг рейтинга
            rating_selectors = [
                "[class*='rating']",
                ".star-view",
                "[data-spm-anchor-id*='rating']"
            ]
            
            product_data['rating'] = 0
            for selector in rating_selectors:
                try:
                    element = self.driver.find_element(By.CSS_SELECTOR, selector)
                    rating_text = element.text.strip()
                    rating_match = re.search(r'(\d+\.?\d*)', rating_text)
                    if rating_match:
                        rating = float(rating_match.group())
                        if 0 <= rating <= 5:
                            product_data['rating'] = rating
                            print(f"✅ Рейтинг: {rating}")
                            break
                except:
                    continue
            
            # Парсинг количества заказов
            orders_selectors = [
                "[class*='order']",
                "[class*='sold']",
                ".product-reviewer-reviews"
            ]
            
            product_data['orders_count'] = 0
            for selector in orders_selectors:
                try:
                    elements = self.driver.find_elements(By.CSS_SELECTOR, selector)
                    for element in elements:
                        orders_text = element.text.strip()
                        if 'sold' in orders_text.lower() or 'order' in orders_text.lower():
                            orders_count = self.extract_number(orders_text)
                            if orders_count > 0:
                                product_data['orders_count'] = orders_count
                                print(f"✅ Заказов: {orders_count}")
                                break
                    if product_data['orders_count'] > 0:
                        break
                except:
                    continue
            
            # Парсинг описания
            desc_selectors = [
                ".product-description",
                "[class*='description']",
                ".detail-desc",
                ".product-overview"
            ]
            
            product_data['original_description'] = ""
            for selector in desc_selectors:
                try:
                    element = self.driver.find_element(By.CSS_SELECTOR, selector)
                    desc = element.text.strip()
                    if desc and len(desc) > 20:
                        product_data['original_description'] = desc[:1000]  # Ограничиваем длину
                        print(f"✅ Описание найдено ({len(desc)} символов)")
                        break
                except:
                    continue
            
            return product_data
            
        except Exception as e:
            print(f"❌ Ошибка при парсинге товара {product_url}: {e}")
            return None
    
    def categorize_product(self, title, description):
        """Автоматическое определение категории товара"""
        text = (title + ' ' + description).lower()
        
        categories = {
            'electronics': [
                'solar', 'battery', 'power', 'charger', 'inverter', 'led', 'light',
                'солнечная', 'батарея', 'аккумулятор', 'зарядное', 'инвертор', 'освещение'
            ],
            'cooking': [
                'stove', 'cooker', 'pot', 'pan', 'fridge', 'refrigerator', 'cooking',
                'плитка', 'горелка', 'кастрюля', 'сковорода', 'холодильник', 'готовка'
            ],
            'comfort': [
                'mattress', 'pillow', 'sleeping', 'chair', 'table', 'tent', 'shelter',
                'матрас', 'подушка', 'спальник', 'кресло', 'стол', 'палатка', 'тент'
            ],
            'tools': [
                'tool', 'wrench', 'screwdriver', 'pump', 'compressor', 'jack',
                'инструмент', 'ключ', 'отвертка', 'насос', 'компрессор', 'домкрат'
            ],
            'safety': [
                'first aid', 'safety', 'alarm', 'lock', 'security', 'camera',
                'аптечка', 'безопасность', 'сигнализация', 'замок', 'камера'
            ],
            'water': [
                'water', 'filter', 'pump', 'tank', 'hose', 'shower', 'toilet',
                'вода', 'фильтр', 'насос', 'бак', 'шланг', 'душ', 'туалет'
            ]
        }
        
        for category, keywords in categories.items():
            if any(keyword in text for keyword in keywords):
                return category
        
        return 'other'
    
    def save_to_database(self, products):
        """Сохранение товаров в базу данных"""
        if not products:
            return {'added_count': 0, 'errors': ['Нет товаров для сохранения']}
        
        try:
            conn = psycopg2.connect(**self.db_config)
            cursor = conn.cursor()
            
            added_count = 0
            errors = []
            
            for product in products:
                try:
                    # Проверка на дубликаты
                    cursor.execute("""
                        SELECT id FROM products WHERE aliexpress_link = %s
                    """, (product['aliexpress_link'],))
                    
                    if cursor.fetchone():
                        errors.append(f"Дубликат: {product['title']}")
                        continue
                    
                    # Определение категории
                    category = self.categorize_product(
                        product['title'],
                        product.get('original_description', '')
                    )
                    
                    # Вставка товара
                    insert_query = """
                        INSERT INTO products (
                            title, original_description, price, original_price,
                            main_image, category, aliexpress_link, rating, orders_count,
                            status, created_at, updated_at
                        ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                        RETURNING id
                    """
                    
                    cursor.execute(insert_query, (
                        product['title'],
                        product.get('original_description', ''),
                        product['price'],
                        product.get('original_price'),
                        product.get('main_image'),
                        category,
                        product['aliexpress_link'],
                        product.get('rating', 0),
                        product.get('orders_count', 0),
                        'active',
                        datetime.now(),
                        datetime.now()
                    ))
                    
                    product_id = cursor.fetchone()[0]
                    added_count += 1
                    print(f"✅ Добавлен товар #{product_id}: {product['title'][:50]}...")
                    
                except Exception as e:
                    errors.append(f"Ошибка добавления {product['title']}: {str(e)}")
            
            conn.commit()
            cursor.close()
            conn.close()
            
            return {
                'added_count': added_count,
                'errors': errors
            }
            
        except Exception as e:
            return {'added_count': 0, 'errors': [f"Ошибка БД: {str(e)}"]}
    
    def parse_and_save(self, search_queries, max_products_per_query=5):
        """Основная функция парсинга и сохранения"""
        all_products = []
        
        for query in search_queries:
            print(f"\n🔍 === Парсинг по запросу: {query} ===")
            
            try:
                # Получаем ссылки на товары
                product_links = self.parse_search_page(query, max_products_per_query)
                
                if not product_links:
                    print(f"⚠️ Товары по запросу '{query}' не найдены")
                    continue
                
                # Парсим каждый товар
                for i, link in enumerate(product_links, 1):
                    print(f"\n📦 Товар {i}/{len(product_links)}")
                    product_data = self.parse_product_page(link)
                    
                    if product_data:
                        all_products.append(product_data)
                        print(f"✅ Товар спарсен: {product_data['title'][:50]}...")
                    else:
                        print(f"❌ Не удалось спарсить товар: {link}")
                    
                    # Задержка между товарами
                    self.random_delay(2, 4)
                
                # Задержка между запросами
                self.random_delay(5, 8)
                
            except Exception as e:
                print(f"❌ Ошибка при парсинге запроса '{query}': {e}")
                continue
        
        if all_products:
            print(f"\n💾 === Сохранение {len(all_products)} товаров ===")
            result = self.save_to_database(all_products)
            print(f"✅ Добавлено товаров: {result['added_count']}")
            
            if result['errors']:
                print("⚠️ Ошибки:")
                for error in result['errors'][:5]:  # Показываем первые 5 ошибок
                    print(f"  - {error}")
        
        return all_products
    
    def close(self):
        """Закрытие драйвера"""
        if hasattr(self, 'driver'):
            self.driver.quit()
            print("🔒 Веб-драйвер закрыт")

def main():
    parser = RealAliExpressParser()
    
    try:
        # Запросы для кемпера
        search_queries = [
            "camping solar panel 100w",
            "rv lithium battery 12v",
            "portable camping stove gas"
        ]
        
        products = parser.parse_and_save(search_queries, max_products_per_query=3)
        print(f"\n🎉 Парсинг завершен! Всего товаров: {len(products)}")
        
    except KeyboardInterrupt:
        print("\n⏹️ Парсинг прерван пользователем")
    except Exception as e:
        print(f"\n💥 Критическая ошибка: {e}")
    finally:
        parser.close()

if __name__ == "__main__":
    main()
