######################################################################## # # RNA-seq sequencing project specific information # ####################################################################### RUN_STAR=y RUN_FEATURECOUNTS=y RUN_SNPCALL=y RUN_RSEM=y RUN_SALMON=n RUN_KALLISTO=n # !!!!!! # Species: human, mouse, rat and etc SAMPLE_SPECIES=mouse # !!!!!! # Directory for FASTQ files. This parameter MUST be changed # every time when you analyze a new project/dataset FASTQ_DIR=/camhpc/ngs/projects/GEO/Stefka/TST10345_TST11079 # !!!!!! # stranded or nonstranded. #0: nonstranded; 1: forward strandness; 2: reverse strandness # For Illumina true mRNA-seq, this parameter is usually set to 2 STRAND=0 # !!!!!! # If you don't have an annotation, please comment the next line # If you do, please make sure coulmn #1 and #2 correponding to # Sample_ID and Subject_ID, respectively. SAMPLE_ANNOTATION=sample.annotation.txt # Suffix for fastq: fq.gz or fastq.gz or fastq or fq FASTQ_SUFFIX=fastq.gz # Select the algorithm for quantification: RSEM, KALLISTO, SALMON_ALN, SALMON # You can set this papareter to ALL if you want to run all algorithms. # SALMON_ALN is generally recommeded. KALLISTO and RSEM also work great. # #ISOFORM_ALGORITHM=SALMON_ALN #ISOFORM_ALGORITHM=ALL ISOFORM_ALGORITHM=RSEM #ISOFORM_ALGORITHM=KALLISTO #ISOFORM_ALGORITHM=SALMON # sequencing type: pair or single SEQUENCE_TYPE=pair # sequencing depth #regular: 40-80M; #deep: >80M, or encounter issues with STAR run due to BAM sorting SEQUENCE_DEPTH=regular #log directory LOGDIR=/camhpc/ngs/projects/TST10345_TST11079/analysis/log mkdir -p $LOGDIR ####################################################################### # ##Software specific parameters ##Usually, you DON'T need to modify them unless you understand the ##impacts of these parameters # ####################################################################### STAR_PARAMETER="--alignSJDBoverhangMin 1 --outFilterMismatchNoverLmax 0.05 --alignIntronMax 1000000" FEATURECOUNTS_OVERLAP="--minOverlap 25" VARSCAN_PARAMETER="--min-coverage 20 --min-reads2 4" RSEM_PARAMETER="--seed 12345 --quiet --time --no-bam-output --bam" SALMON_ALN_PARAMETER=" --incompatPrior 0 " SALMON_PARAMETER=" --incompatPrior 0 --validateMappings " KALLISTO_PARAMETER=" " # change 100 to 150 for read length > 100 STAR_INDEX=/camhpc/ngs/genomes/mouse/gencode.vM16/STAR_100 RSEM_INDEX=/camhpc/ngs/genomes/mouse/gencode.vM16/rsem/rsem GENOME_FASTA=/camhpc/ngs/genomes/mouse/gencode.vM16/fasta/GRCm38.primary.genome.fa TRANSCRIPT_FASTA=/camhpc/ngs/genomes/mouse/gencode.vM16/fasta/gencode.vM16.transcripts.polish.fa GTF_FILE=/camhpc/ngs/genomes/mouse/gencode.vM16/annotation/gencode.vM16.gtf GENE_ANNOTATION=/camhpc/ngs/genomes/mouse/gencode.vM16/annotation/gencode.vM16.gene.annot TRANSCRIPT_ANNOTATION=/camhpc/ngs/genomes/mouse/gencode.vM16/annotation/gencode.vM16.transcripts.annot # HLA region for the most SNPs, it is orgnism specific CHR_REGION=chr17:1-94987271 # tools module purge module load STAR/2.5.2a module load rsem/1.2.26 module load subread/1.5.0-p1 module load bowtie/1.2.2 module load samtools/1.9 module load R/3.5.1 module load pandoc/2.1.1 module load varscan/2.4.2 VARSCAN_JAR=/camhpc/pkg/varscan/2.4.2/centos6/varscan.jar export QuickIsoSeq=/camhpc/ngs/tools/pipelines/QuickIsoSeq export SCRIPTPATH=/camhpc/ngs/tools/pipelines/QuickIsoSeq/QuickIsoSeq_html export PATH=$QuickIsoSeq:$SCRIPTPATH:$PATH