当前位置: 首页 > news >正文

蛋白多序列比对美化

1、用snapgene进行多序列比对,导出alin文件
2、用python进行多序列比对美化

点击查看代码
from Bio import AlignIO
import os# ====== 用户参数 ======
alignment_file = "比对.fa"    # 输入比对文件(fasta/clustal)
alignment_format = "fasta"
html_output = "msa_ruvc_all.html"# 背景渐变蓝色(保守性)
light_blue = "e6f3ff"
dark_blue  = "08306b"# RUVC1/2/3 定义(基于 ungapped 序列位置,1-based)
RUVC1 = {"FnCas12a_6I1K_1": [(892, 953)],"LbCas12a_5ID6_1": [(809, 858)],"LbCas12a_6NME_1": [(808, 872)],"Lb2Cas12a_8I54_1": [(792, 852)],"ReChb_Cas12a_1": [(853, 914)],
}
RUVC2 = {"FnCas12a_6I1K_1": [(971, 1078)],"LbCas12a_5ID6_1": [(890, 1011)],"LbCas12a_6NME_1": [(890, 997)],"Lb2Cas12a_8I54_1": [(869, 992)],"ReChb_Cas12a_1": [(930, 1044)],
}
RUVC3 = {"FnCas12a_6I1K_1": [(1254, 1300)],"LbCas12a_5ID6_1": [(1138, 1228)],"LbCas12a_6NME_1": [(1179, 1228)],"Lb2Cas12a_8I54_1": [(1151, 1206)],"ReChb_Cas12a_1": [(1215, 1261)],
}# RUVC 样式
RUVC_color = "#FEBC28"  # 所有 RUVC 使用同一颜色
RUVC_italic = False     # 是否斜体# ====== 读取比对 ======
alignment = AlignIO.read(alignment_file, alignment_format)
seq_len = alignment.get_alignment_length()# 计算保守性
conservation = []
for i in range(seq_len):column = [rec.seq[i] for rec in alignment]chars = [aa for aa in column if aa != "-"]freq = max([chars.count(aa)/len(chars) for aa in set(chars)]) if chars else 0.0conservation.append(freq)# ====== 辅助函数 ======
def hex_to_rgb(hexstr):return int(hexstr[0:2], 16), int(hexstr[2:4], 16), int(hexstr[4:6], 16)def rgb_to_hex(r, g, b):return f"{r:02x}{g:02x}{b:02x}"lr, lg, lb = hex_to_rgb(light_blue)
dr, dg, db = hex_to_rgb(dark_blue)# 将 ungapped 座标映射到 alignment 座标
def build_ruvc_aligned(ruvc_dict, alignment):result = {}for rec in alignment:seq_id = rec.idseq = str(rec.seq)mapping = [i for i, ch in enumerate(seq) if ch != "-"]if seq_id in ruvc_dict:newranges = []seq_len = len(mapping)for s, e in ruvc_dict[seq_id]:if s > seq_len:continuestart_al = mapping[s-1]end_al   = mapping[min(e, seq_len)-1]if start_al <= end_al:newranges.append((start_al, end_al))if newranges:result[seq_id] = newrangesreturn resultRUVC1_aligned = build_ruvc_aligned(RUVC1, alignment)
RUVC2_aligned = build_ruvc_aligned(RUVC2, alignment)
RUVC3_aligned = build_ruvc_aligned(RUVC3, alignment)# 判断某位置是否属于任意 RUVC
def in_ruvc(seq_id, pos, aa):if aa == "-":return Falsefor ruvc_map in [RUVC1_aligned, RUVC2_aligned, RUVC3_aligned]:for start, end in ruvc_map.get(seq_id, []):if start <= pos <= end:return Truereturn False# ====== 生成 HTML ======
with open(html_output, "w", encoding="utf-8") as out:out.write("<!doctype html><html lang='zh-CN'><head><meta charset='utf-8'>\n")out.write("<title>MSA - RUVC 高亮</title>\n")out.write("<style>\n")out.write("body{font-family: Consolas, monospace; padding:16px}\n")out.write("table { border-collapse: collapse; }\n")out.write("td.id { vertical-align: top; padding:4px 8px; white-space: nowrap; }\n")out.write("td.seq { vertical-align: top; padding:4px 8px; white-space: pre; }\n")out.write("span.res { display:inline-block; padding:0 1px; }\n")out.write("</style></head><body>\n")out.write(f"<h2>多序列比对(RUVC 高亮) — {os.path.basename(alignment_file)}</h2>\n")out.write("<div style='overflow-x:auto'><table>\n")for rec in alignment:seq_id = rec.idseq = str(rec.seq)out.write("<tr>")out.write(f"<td class='id'>{seq_id}</td>")out.write("<td class='seq'>")for i, aa in enumerate(seq):if aa == "-":bg = "#ffffff"color = "#000000"style_extra = ""else:# 背景渐变蓝色cons = conservation[i]r = int(round(lr + (dr - lr) * cons))g = int(round(lg + (dg - lg) * cons))b = int(round(lb + (db - lb) * cons))bg = "#" + rgb_to_hex(r, g, b)# RUVC 标注if in_ruvc(seq_id, i, aa):color = RUVC_colorstyle_extra = "font-style:italic;" if RUVC_italic else ""else:color = "#000000"style_extra = ""out.write(f"<span class='res' style='background-color:{bg};color:{color};{style_extra}'>{aa}</span>")out.write("</td></tr>\n")out.write("</table></div></body></html>\n")print(f"已生成:{html_output}")
http://www.wxhsa.cn/company.asp?id=7641

相关文章:

  • Gitee推出Remote mcp-gitee:云端MCP服务开启智能协作新时代
  • Gitee DevOps平台:驱动中国企业数字化转型的核心引擎
  • 10 类多布局扫描图像数据集:支撑 OCR 精度提升与 VLM 微调,覆盖广告 / 简历 / 论文等场景的计算机视觉训练数据
  • 国产化Excel开发组件Spire.XLS教程:C# 轻松将 DataSet 导出到 Excel
  • Mysql:Docker的Mysql容器加载Levenshtein 距离算法脚本,实现“相似度匹配”
  • 树链剖分
  • 【2025-09-17】慢慢得到
  • Excel处理控件Aspose.Cells教程:如何使用Python在Excel中创建下拉列表
  • STM32的电子钟功能实现
  • kylin V11安装mysql8.0.41(glibc2.28)
  • __cpuid
  • Gitee崛起:国产代码托管平台如何重塑企业研发效能新格局
  • 字节SQL数据库开发手册
  • 完整教程:视频上传以及在线播放
  • C++ STL 常用算法
  • Gitee:中国开发者生态的成长引擎与数字化转型的加速器
  • 【IEEE出版|五邑大学主办|连续四年EI检索】第五届电子信息工程与计算机技术国际学术会议(EIECT 2025)
  • tightvnc使用记录
  • 高科战神全家软件怎么设置
  • 简单数论函数求和题目的一些技巧
  • 3519DV500 BT.1120 无法输出 59.94帧率
  • 独立做产品,做一个,还是做多个找爆款?
  • 第六届计算机工程与智能控制学术会议(ICCEIC 2025)
  • ARL(灯塔)安装步骤
  • c# grpc
  • win10任务栏频繁卡死、转圈
  • Typora Markdown 编辑快捷键大全(优化补充版)
  • 第二届数字经济与计算机科学国际学术会议(DECS 2025)
  • 文件摆渡系统案例分享:医院如何构建高效内外网文件交换通道
  • 淘天一面