import requests
import zipfile
import os
import pandas as pd
from datetime import datetime, timedelta
import dolphindb as ddb
import numpy as np
import csv
import logging
from datetime import datetime
from ..configs import DDB,HIS_CONFIG,BINANCE_BASE_CONFIG
def setup_logging():
    """Set up logging"""
    # Specify log save path
    log_dir = HIS_CONFIG["LOG_DIR"]  
    os.makedirs(log_dir, exist_ok=True)
    
    log_filename = os.path.join(log_dir, f"binance_import_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log")
    
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(levelname)s - %(message)s',
        handlers=[
            logging.FileHandler(log_filename, encoding='utf-8'),
            logging.StreamHandler()
        ]
    )
    return logging.getLogger(__name__)

class BinanceDownloader:
    def __init__(self, proxy=None):
        # Initialize logger
        self.logger = setup_logging()
        
        # Other initialization code...
        self.proxies = proxy 
        
        self.ddb = ddb.session()
        self.ddb.connect(DDB["HOST"], DDB["PORT"], DDB["USER"], DDB["PWD"])
        
        self.logger.info(f"Proxy settings: {self.proxies}")
        self.logger.info("DolphinDB connected")

    def download_file(self, accountType,symbol, date_str,save_dir=HIS_CONFIG["SAVE_DIR"]): # Local file save location
        """Download a single file"""
        save_dir = os.path.abspath(save_dir)
        os.makedirs(save_dir, exist_ok=True)
        
        filename = f"{symbol}-aggTrades-{date_str}.zip"
        if accountType == "um":
            url = f"https://data.binance.vision/data/futures/{accountType}/daily/aggTrades/{symbol}/{filename}"
        elif accountType == "cm":
            url = f"https://data.binance.vision/data/futures/{accountType}/daily/aggTrades/{symbol}/{filename}"
        elif accountType == "spot":
            url = f"https://data.binance.vision/data/{accountType}/daily/aggTrades/{symbol}/{filename}"
        else:
            raise ValueError(f"Unsupported account type: {accountType}. Supported: um/cm/spot")
        csv_path = os.path.join(save_dir, f"{symbol}-aggTrades-{date_str}.csv")
        
        if os.path.exists(csv_path):
            self.logger.info(f"File already exists: {csv_path}")
            return csv_path
        
        self.logger.info(f"Starting download: {filename}")
        response = requests.get(url, proxies=self.proxies, timeout=30)
        response.raise_for_status()
        
        zip_path = os.path.join(save_dir, filename)
        with open(zip_path, 'wb') as f:
            f.write(response.content)
        
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            zip_ref.extractall(save_dir)
        
        os.remove(zip_path)
        self.logger.info(f"Download complete: {csv_path}")
        return csv_path    
    
    def parse_csv_to_dataframe(self, csv_path, accountType, symbol):
        """Parse CSV file into DataFrame"""
        try:
            # Read CSV file
            with open(csv_path, 'r') as f:
                first_line = f.readline().strip()
                second_line = f.readline().strip()
                
                reader = csv.reader([first_line, second_line])
                first_row = next(reader)
                second_row = next(reader)

            def is_numeric(s):
                try:
                    float(s.strip())
                    return True
                except:
                    return False

            # Compare the data types of each column in the first two rows
            types_match = True
            for i in range(min(len(first_row), len(second_row))):
                if is_numeric(first_row[i]) != is_numeric(second_row[i]):
                    types_match = False
                    break

            # If types differ, assume the file has a header
            if not types_match:
                self.logger.info(f"Auto-detect: file contains header")
                df = pd.read_csv(csv_path, header=0)
            else:
                self.logger.info(f"Auto-detect: file does not contain header")
                df = pd.read_csv(csv_path, header=None)
            # Rename columns for further processing
            df.columns = [
                'aggTradeId', 'price', 'qty', 'firstId','lastId', 'time', 'is_buyer_maker'
            ]
            if accountType == "spot":
                exchange_name = 'Binance-Spot'
            elif accountType == "um":
                exchange_name = 'Binance-Futures'
            else:
                exchange_name = "Binance-CM"
            # Get number of rows
            row_count = len(df)
            
            # Convert to target format
            result_df = pd.DataFrame({
                'eventTime': pd.to_datetime(
                    df['time'] / (1000 if df['time'].iloc[0] > 1e14 else 1),
                    unit='ms')+ pd.Timedelta(milliseconds=1),
                'collectionTime': pd.NaT,
                'exchange': exchange_name,
                'instrument': symbol,
                'price': df['price'].astype(float),
                'qty': df['qty'].astype(float),
                'count':pd.array([pd.NA] * len(df), dtype='Int64'),
                'side': df['is_buyer_maker'].map(lambda x: 'sell' if x == 'TRUE' else 'buy'),
                'tradeId':df['aggTradeId'].astype('int64'),
                'firstId':df['firstId'].astype('int64'),
                'lastId':df['lastId'].astype('int64'),
            })
            
            self.logger.info(f"CSV parsed successfully: {csv_path}, rows: {len(result_df)}")
            return result_df
            
        except Exception as e:
            self.logger.error(f"CSV parsing failed {csv_path}: {str(e)}")
            return None
    
    def import_to_db(self, df, db_path, table_name,batch_size=10000):
        """Import DataFrame into DolphinDB"""
        try:
            if df is None or df.empty:
                self.logger.warning("DataFrame is empty, skipping import")
                return 0
            total_records = 0 
            total_rows = len(df)
            # Upload DataFrame to DolphinDB
            for i in range(0, total_rows, batch_size):
                    batch_df = df.iloc[i:i+batch_size]
                    
                    # Upload batch data
                    self.ddb.upload({'batch_data': batch_df})
                    
                    # Execute insertion
                    script = f'''
                    tb = loadTable("{db_path}", "{table_name}");
                    tb.append!(batch_data);
                    batch_data.size()
                    '''
                    result = self.ddb.run(script)
                    total_records += result
                    
                    # Clear batch data
                    self.ddb.run("undef(`batch_data)")

            self.logger.info(f"Total records imported successfully: {total_records}")
            return result
            
        except Exception as e:
            self.logger.error(f"Database import failed: {str(e)}")
            return 0
    
    def process_single_file(self, accountType, symbol, date_str,db_path, table_name):
        """Full workflow for processing a single file"""
        try:
            # Download file
            csv_path = self.download_file(accountType,symbol, date_str)
            
            # Parse CSV
            df = self.parse_csv_to_dataframe(csv_path, accountType, symbol)
            
            # Import to DB
            records = self.import_to_db(df, db_path, table_name)
            
            # Delete local file
            if os.path.exists(csv_path):
                os.remove(csv_path)
                self.logger.info(f"Local file deleted: {csv_path}")
            
            return records
            
        except Exception as e:
            self.logger.error(f"File processing failed {symbol} {date_str}: {str(e)}")
            return 0
    
    def run(self, accountType, symbols, start_date, end_date,db_path, table_name):
        """Execute download and import"""
        self.logger.info(f"Start processing: {symbols}, trade data from {start_date} to {end_date}")
        
        current_date = start_date
        total_records = 0
        processed_files = 0
        failed_files = 0
        
        while current_date <= end_date:
            date_str = current_date.strftime("%Y-%m-%d")
            
            for symbol in symbols:
                try:
                    records = self.process_single_file(accountType, symbol, date_str, db_path, table_name)
                    total_records += records
                    processed_files += 1
                    
                    self.logger.info(f"Completed: {symbol} {date_str}, records: {records}")
                    
                except Exception as e:
                    failed_files += 1
                    self.logger.error(f"Processing failed {symbol} {date_str}: {str(e)}")
            
            current_date += timedelta(days=1)
        
        self.logger.info(f"All done!")
        self.logger.info(f"Successfully processed: {processed_files} files")
        self.logger.info(f"Failed: {failed_files} files")
        self.logger.info(f"Total records: {total_records}")
        
        return {
            'total_records': total_records,
            'processed_files': processed_files,
            'failed_files': failed_files
        }

# Usage
if __name__ == "__main__":
    # Configuration
    #proxy = {
        # 'http': 'http://127.0.0.1:7890',    # Change to actual proxy
        # 'https': 'http://127.0.0.1:7890'
    #}
    proxy_address = BINANCE_BASE_CONFIG["PROXY"]
    proxy={'http': proxy_address, 'https': proxy_address}

    accountType = "um"  # Supported: um/cm/spot
    symbols = ["BTCUSDT","ETHUSDT","ADAUSDT","ALGOUSDT","BNBUSDT","FETUSDT","GRTUSDT","LTCUSDT","XRPUSDT"]    
    start_date = datetime(2025, 8, 11)
    end_date = datetime(2025,8,12)
    db_path = 'dfs://CryptocurrencyTick'       
    table_name = "trade"      
    
    # Execute
    downloader = BinanceDownloader(proxy)
    result = downloader.run(accountType, symbols, start_date, end_date, db_path, table_name)
    
    print(f"\nFinal statistics:")
    print(f"Total records: {result['total_records']}")
    print(f"Successful files: {result['processed_files']}")
    print(f"Failed files: {result['failed_files']}")
