import binascii
import logging
import multiprocessing
from functools import partial

# Configure logging to display messages on the console
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def read_file_content(file_path):
    logging.info(f"Reading file content from {file_path}")
    with open(file_path, 'rb') as file:
        return file.read()

def hexlify_content(file_content):
    logging.info("Converting file content to hexadecimal")
    return binascii.hexlify(file_content).decode('utf-8')

def apply_transformations(segment, candidate_bytes):
    transformations = []
    for i in range(len(segment)):
        original_byte = segment[i]
        candidate_byte = candidate_bytes[i]
        results = {
            "addition_mod": (candidate_byte + original_byte) % 256,
            "xor": candidate_byte ^ original_byte,
            "subtraction_mod": (candidate_byte - original_byte) % 256
        }
        transformations.append(results)
    return transformations

def verify_candidate(segment, transformations):
    for i, trans in enumerate(transformations):
        if not (trans["addition_mod"] == (segment[i] + segment[i]) % 256 or
                trans["xor"] == (segment[i] ^ segment[i]) or
                trans["subtraction_mod"] == (segment[i] - segment[i]) % 256):
            return False
    return True

def process_segment(segment, file_content, segment_size):
    potential_mcuids = {}
    for j in range(len(file_content) - segment_size):
        candidate = file_content[j: j + segment_size]
        transformations = apply_transformations(segment, candidate)
        if verify_candidate(segment, transformations):
            candidate_hex = candidate.hex()
            if candidate_hex not in potential_mcuids:
                potential_mcuids[candidate_hex] = 0
            potential_mcuids[candidate_hex] += 1
    return potential_mcuids

def analyze_segments(file_content, segment_indices, segment_size):
    potential_mcuids = {}
    for i in segment_indices:
        segment = file_content[i * segment_size: (i + 1) * segment_size]
        if segment.count(0) > segment_size / 2:
            continue
        segment_result = process_segment(segment, file_content, segment_size)
        for candidate_hex, count in segment_result.items():
            if candidate_hex not in potential_mcuids:
                potential_mcuids[candidate_hex] = 0
            potential_mcuids[candidate_hex] += count
    return potential_mcuids

def analyze_file(file_path, segment_size=8, log_interval=10000, batch_size=10000):
    file_content = read_file_content(file_path)
    hex_content = hexlify_content(file_content)
    
    num_segments = len(file_content) // segment_size
    
    logging.info(f"Analyzing file with {num_segments} segments of size {segment_size}")

    potential_mcuids = {}
    segment_batches = [range(i, min(i + batch_size, num_segments)) for i in range(0, num_segments, batch_size)]
    
    with multiprocessing.Pool() as pool:
        for batch_index, segment_indices in enumerate(segment_batches):
            logging.info(f"Analyzing batch {batch_index + 1}/{len(segment_batches)}")
            results = pool.apply_async(analyze_segments, (file_content, segment_indices, segment_size)).get()
            for candidate_hex, count in results.items():
                if candidate_hex not in potential_mcuids:
                    potential_mcuids[candidate_hex] = 0
                potential_mcuids[candidate_hex] += count

    sorted_candidates = sorted(potential_mcuids.items(), key=lambda x: x[1], reverse=True)
    return {
        "first_1000_chars_hex": hex_content[:1000],
        "sorted_candidates": sorted_candidates,
        "total_segments": num_segments
    }

def main():
    input_file = "/var/www/html/modification_carto/volkswagensimospcr2.1_bench_fullbackup_41048480128019348206001024fade00_20230407173439_int_flash.bin"
    
    results = analyze_file(input_file)
    
    logging.info("First 1000 characters of the file in hex:")
    logging.info(results["first_1000_chars_hex"])
    
    logging.info("Potential MCU IDs (sorted by frequency):")
    for candidate, frequency in results["sorted_candidates"][:10]:  # Show only top 10 for brevity
        logging.info(f"Candidate MCUID: {candidate}, Frequency: {frequency}")

    logging.info(f"Total segments analyzed: {results['total_segments']}")

if __name__ == "__main__":
    main()