基础软件
conda install cutadapt, trimmomatic, samtools, hisat2, subread, deeptools -y
人类转录组上游分析
# 样本名称
sample_name=sample
# 线程
threads=4
# 双端测序原始fastq1和fastq2路径
fastq1_path=/path/${sample_name}_1.fq.gz
fastq2_path=/path/${sample_name}_2.fq.gz
# 双端测序clean fastq1和fastq2路径
clean_fastq1_path=/path/${sample_name}_clean_1.fq.gz
clean_fastq2_path=/path/${sample_name}_clean_2.fq.gz
# 双端测序clean filter fastq1和fastq2路径
filter_fastq_path=/path/${sample_name}_filter.fq.gz
filter_fastq1_path=/path/${sample_name}_filter_1.fq.gz
filter_fastq2_path=/path/${sample_name}_filter_2.fq.gz
# 输出SAM文件路径
sam_path=/path/${sample_name}.sam
# 输出BAM文件路径
bam_path=/path/${sample_name}.bam
bam_sorted_path=/path/${sample_name}.sorted.bam
# 输出结果文件
bam_sorted_bw_path=/path/${sample_name}.sorted.bam.bw
count_path=/path/${sample_name}.count
###########################################
# 去接头
cutadapt --pair-filter=any --minimum-length 15 --max-n 8 \
-a AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC \
-A AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT \
-o $clean_fastq1_path -p $clean_fastq2_path \
$fastq1_path $fastq2_path
# 去除低质量碱基
trimmomatic PE -threads $threads -phred33 \
$clean_fastq1_path $clean_fastq2_path \
-baseout $filter_fastq_path AVGQUAL:20 SLIDINGWINDOW:4:15 MINLEN:15
# 比对到参考基因组
hisat2 --threads $threads -x /reference/hisat2/hg19 \
-1 $filter_fastq1_path -2 $filter_fastq1_path \
-S $sam_path
# SAM转换为BAM
samtools view -bS $sam_path -@ $threads| \
samtools sort $bam_path -o $bam_sorted_path -@ $threads
# 排序和索引BAM文件
samtools index $bam_path -@ $threads
# featureCounts
featureCounts -T 30 -t exon -g gene_id \
-a /reference/hg19/Homo_sapiens.GRCh37.75.gtf \
-o $count_path $bam_sorted_path
# 生成bamCoverage
bamCoverage --bam $bam_sorted_path -o $bam_sorted_bw_path \
--binSize 10 -p $threads
小鼠转录组上游分析
参考基因组和GTF注释文件替换为小鼠mm10。
# 样本名称
sample_name=sample
# 线程
threads=4
# 双端测序原始fastq1和fastq2路径
fastq1_path=/path/${sample_name}_1.fq.gz
fastq2_path=/path/${sample_name}_2.fq.gz
# 双端测序clean fastq1和fastq2路径
clean_fastq1_path=/path/${sample_name}_clean_1.fq.gz
clean_fastq2_path=/path/${sample_name}_clean_2.fq.gz
# 双端测序clean filter fastq1和fastq2路径
filter_fastq_path=/path/${sample_name}_filter.fq.gz
filter_fastq1_path=/path/${sample_name}_filter_1.fq.gz
filter_fastq2_path=/path/${sample_name}_filter_2.fq.gz
# 输出SAM文件路径
sam_path=/path/${sample_name}.sam
# 输出BAM文件路径
bam_path=/path/${sample_name}.bam
bam_sorted_path=/path/${sample_name}.sorted.bam
# 输出结果文件
bam_sorted_bw_path=/path/${sample_name}.sorted.bam.bw
count_path=/path/${sample_name}.count
###########################################
# 去接头
cutadapt --pair-filter=any --minimum-length 15 --max-n 8 \
-a AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC \
-A AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT \
-o $clean_fastq1_path -p $clean_fastq2_path \
$fastq1_path $fastq2_path
# 去除低质量碱基
trimmomatic PE -threads $threads -phred33 \
$clean_fastq1_path $clean_fastq2_path \
-baseout $filter_fastq_path AVGQUAL:20 SLIDINGWINDOW:4:15 MINLEN:15
# 比对到参考基因组
hisat2 --threads $threads -x /reference/hisat2/mm10 \
-1 $filter_fastq1_path -2 $filter_fastq1_path \
-S $sam_path
# SAM转换为BAM
samtools view -bS $sam_path -@ $threads| \
samtools sort $bam_path -o $bam_sorted_path -@ $threads
# 排序和索引BAM文件
samtools index $bam_path -@ $threads
# featureCounts
featureCounts -T 30 -t exon -g gene_id \
-a /reference/mm10/Mus_musculus.GRCm38.102.gtf \
-o $count_path $bam_sorted_path
# 生成bamCoverage
bamCoverage --bam $bam_sorted_path -o $bam_sorted_bw_path \
--binSize 10 -p $threads