AlphaFold3 中templates模块的_find_template_in_pdb函数用于在 mmCIF 文件中查找与给定模板序列匹配的链,通过三步匹配策略确保找到最佳的模板链。
源代码:
def _find_template_in_pdb(
        template_chain_id: str,
        template_sequence: str,
        mmcif_object: mmcif_parsing.MmcifObject,
) -> Tuple[str, str, int]:
    """Tries to find the template chain in the given pdb file.
    This method tries the three following things in order:
        1. Tries if there is an exact match in both the chain ID and the sequence.
             If yes, the chain sequence is returned. Otherwise:
        2. Tries if there is an exact match only in the sequence.
             If yes, the chain sequence is returned. Otherwise:
        3. Tries if there is a fuzzy match (X = wildcard) in the sequence.
             If yes, the chain sequence is returned.
    If none of these succeed, a SequenceNotInTemplateError is thrown.
    Args:
        template_chain_id: The template chain ID.
        template_sequence: The template chain sequence.
        mmcif_object: The PDB object to search for the template in.
    Returns:
        A tuple with:
        * The chain sequence that was found to match the template in the PDB object.
        * The ID of the chain that is being returned.
        * The offset where the template sequence starts in the chain sequence.
    Raises:
        SequenceNotInTemplateError: If no match is found after the steps described
            above.
    """
    # Try if there is an exact match in both the chain ID and the (sub)sequence.
    pdb_id = mmcif_object.file_id
    chain_sequence = mmcif_object.chain_to_seqres.get(template_chain_id)
    if chain_sequence and (template_sequence in chain_sequence):
        # logging.info(
        #     "Found an exact template match %s_%s.", pdb_id, template_chain_id
        # )
        mapping_offset = chain_sequence.find(template_sequence)
        return chain_sequence, template_chain_id, mapping_offset
    # Try if there is an exact match in the (sub)sequence only.
    for chain_id, chain_sequence in mmcif_object.chain_to_seqres.items():
        if chain_sequence and (template_sequence in chain_sequence):
            # logging.info("Found a sequence-only match %s_%s.", pdb_id, chain_id)
            mapping_offset = chain_sequence.find(template_sequence)
            return chain_sequence, chain_id, mapping_offset
    # Return a chain sequence that fuzzy matches (X = wildcard) the template.
    # Make parenthes


















