#!/bin/sh

set -e  # Exit immediately if a simple command exits with a non-zero status

# $rs_line must be the same as in the filter3_ds_flat.sh script:
rs_line_fields2to7="\| human \| 9606 \| [^|]* \| [^|]* \| [^|]* \| [^|]*"
rs_line="^rs[^|]* $rs_line_fields2to7"

echo "Number of records (all numbers should be the same):"
grep -E '^rs' "$1" | wc -l
grep -E '^rs[^|]* \| human \| 9606 \| [^|]* \| [^|]* \| [^|]* \| [^|]*$' "$1" | wc -l
grep -E '^SNP \| alleles=' "$1" | wc -l
grep -E '^VAL \| validated=' "$1" | wc -l

echo "validated=YES vs validated=NO (the sum should be = nb of records):"
grep -E '^VAL \| validated=YES' "$1" | wc -l
grep -E '^VAL \| validated=NO' "$1" | wc -l

echo "Number of reported locations in reference genome (should be >= nb of records):"
grep -E '^CTG \| assembly=GRCh37\.p13 \| chr=' "$1" | wc -l

echo "Number of records NOT tagged with \"snp\" (should be <= nb of records):"
grep -E "$rs_line$" "$1" | grep -v '| snp |' | wc -l

