SHOGUN  v1.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
List of all members | Public Member Functions | Static Public Member Functions | Static Public Attributes | Protected Member Functions | Protected Attributes
CAlphabet Class Reference

Detailed Description

The class Alphabet implements an alphabet and alphabet utility functions.

These utility functions can be used to remap characters to more (bit-)efficient representations, check if a string is valid, compute histograms etc.

Currently supported alphabets are DNA, RAWDNA, RNA, PROTEIN, BINARY, ALPHANUM, CUBE, RAW, IUPAC_NUCLEIC_ACID and IUPAC_AMINO_ACID.

Definition at line 88 of file Alphabet.h.

Inheritance diagram for CAlphabet:
Inheritance graph
[legend]

Public Member Functions

 CAlphabet ()
 CAlphabet (char *alpha, int32_t len)
 CAlphabet (EAlphabet alpha)
 CAlphabet (CAlphabet *alpha)
virtual ~CAlphabet ()
bool set_alphabet (EAlphabet alpha)
EAlphabet get_alphabet () const
int32_t get_num_symbols () const
int32_t get_num_bits () const
uint8_t remap_to_bin (uint8_t c)
uint8_t remap_to_char (uint8_t c)
void clear_histogram ()
 clear histogram
template<class T >
void add_string_to_histogram (T *p, int64_t len)
void add_byte_to_histogram (uint8_t p)
void print_histogram ()
 print histogram
SGVector< int64_t > get_histogram ()
bool check_alphabet (bool print_error=true)
bool is_valid (uint8_t c)
bool check_alphabet_size (bool print_error=true)
int32_t get_num_symbols_in_histogram ()
int32_t get_max_value_in_histogram ()
int32_t get_num_bits_in_histogram ()
virtual const char * get_name () const
template<>
void translate_from_single_order (float32_t *obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap)
template<>
void translate_from_single_order (float64_t *obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap)
template<>
void translate_from_single_order (floatmax_t *obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap)
template<>
void translate_from_single_order_reversed (float32_t *obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap)
template<>
void translate_from_single_order_reversed (float64_t *obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap)
template<>
void translate_from_single_order_reversed (floatmax_t *obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap)
- Public Member Functions inherited from CSGObject
 CSGObject ()
 CSGObject (const CSGObject &orig)
virtual ~CSGObject ()
virtual bool is_generic (EPrimitiveType *generic) const
template<class T >
void set_generic ()
void unset_generic ()
virtual void print_serializable (const char *prefix="")
virtual bool save_serializable (CSerializableFile *file, const char *prefix="")
virtual bool load_serializable (CSerializableFile *file, const char *prefix="")
void set_global_io (SGIO *io)
SGIOget_global_io ()
void set_global_parallel (Parallel *parallel)
Parallelget_global_parallel ()
void set_global_version (Version *version)
Versionget_global_version ()
SGVector< char * > get_modelsel_names ()
char * get_modsel_param_descr (const char *param_name)
index_t get_modsel_param_index (const char *param_name)

Static Public Member Functions

static const char * get_alphabet_name (EAlphabet alphabet)
template<class ST >
static void translate_from_single_order (ST *obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val)
template<class ST >
static void translate_from_single_order_reversed (ST *obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val)
template<class ST >
static void translate_from_single_order (ST *obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap)
template<class ST >
static void translate_from_single_order_reversed (ST *obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val, int32_t gap)

Static Public Attributes

static const uint8_t B_A = 0
static const uint8_t B_C = 1
static const uint8_t B_G = 2
static const uint8_t B_T = 3
static const uint8_t B_0 = 4
static const uint8_t MAPTABLE_UNDEF = 0xff
static const char * alphabet_names [18]

Protected Member Functions

void init_map_table ()
void copy_histogram (CAlphabet *src)
virtual void load_serializable_post () throw (ShogunException)
- Protected Member Functions inherited from CSGObject
virtual void load_serializable_pre () throw (ShogunException)
virtual void save_serializable_pre () throw (ShogunException)
virtual void save_serializable_post () throw (ShogunException)

Protected Attributes

EAlphabet alphabet
int32_t num_symbols
int32_t num_bits
bool valid_chars [1<< (sizeof(uint8_t)*8)]
uint8_t maptable_to_bin [1<< (sizeof(uint8_t)*8)]
uint8_t maptable_to_char [1<< (sizeof(uint8_t)*8)]
int64_t histogram [1<< (sizeof(uint8_t)*8)]

Additional Inherited Members

- Public Attributes inherited from CSGObject
SGIOio
Parallelparallel
Versionversion
Parameterm_parameters
Parameterm_model_selection_parameters

Constructor & Destructor Documentation

CAlphabet ( )

default constructor

Definition at line 34 of file Alphabet.cpp.

CAlphabet ( char *  alpha,
int32_t  len 
)

constructor

Parameters
alphaalphabet to use
lenlen

Definition at line 40 of file Alphabet.cpp.

CAlphabet ( EAlphabet  alpha)

constructor

Parameters
alphaalphabet (type) to use

Definition at line 87 of file Alphabet.cpp.

CAlphabet ( CAlphabet alpha)

constructor

Parameters
alphaalphabet to use

Definition at line 94 of file Alphabet.cpp.

~CAlphabet ( )
virtual

Definition at line 103 of file Alphabet.cpp.

Member Function Documentation

void add_byte_to_histogram ( uint8_t  p)

add element to histogram

Parameters
pelement

Definition at line 190 of file Alphabet.h.

void add_string_to_histogram ( T *  p,
int64_t  len 
)

make histogram for whole string

Parameters
pstring
lenlength of string

Definition at line 180 of file Alphabet.h.

bool check_alphabet ( bool  print_error = true)

check whether symbols in histogram are valid in alphabet e.g. for DNA if only letters ACGT appear

Parameters
print_errorif errors shall be printed
Returns
if symbols in histogram are valid in alphabet

Definition at line 600 of file Alphabet.cpp.

bool check_alphabet_size ( bool  print_error = true)

check whether symbols in histogram ALL fit in alphabet

Parameters
print_errorif errors shall be printed
Returns
if symbols in histogram ALL fit in alphabet

Definition at line 622 of file Alphabet.cpp.

void clear_histogram ( )

clear histogram

Definition at line 543 of file Alphabet.cpp.

void copy_histogram ( CAlphabet src)
protected

copy histogram

Parameters
srcalphabet to copy histogram from

Definition at line 639 of file Alphabet.cpp.

EAlphabet get_alphabet ( ) const

get alphabet

Returns
alphabet

Definition at line 127 of file Alphabet.h.

const char * get_alphabet_name ( EAlphabet  alphabet)
static

return alphabet name

Parameters
alphabetalphabet type to get name from

Definition at line 652 of file Alphabet.cpp.

SGVector< int64_t > get_histogram ( )

get histogram

Returns
histogram

Definition at line 595 of file Alphabet.cpp.

int32_t get_max_value_in_histogram ( )

return maximum value in histogram

Returns
maximum value in histogram

Definition at line 549 of file Alphabet.cpp.

virtual const char* get_name ( ) const
virtual
Returns
object name

Implements CSGObject.

Definition at line 258 of file Alphabet.h.

int32_t get_num_bits ( ) const

get number of bits necessary to store all symbols in alphabet

Returns
number of necessary storage bits

Definition at line 146 of file Alphabet.h.

int32_t get_num_bits_in_histogram ( )

return number of bits required to store all symbols in histogram

Returns
number of bits required to store all symbols in histogram

Definition at line 576 of file Alphabet.cpp.

int32_t get_num_symbols ( ) const

get number of symbols in alphabet

Returns
number of symbols

Definition at line 136 of file Alphabet.h.

int32_t get_num_symbols_in_histogram ( )

return number of symbols in histogram

Returns
number of symbols in histogram

Definition at line 564 of file Alphabet.cpp.

void init_map_table ( )
protected

init map table

Definition at line 178 of file Alphabet.cpp.

bool is_valid ( uint8_t  c)

check whether symbols are valid in alphabet e.g. for DNA if symbol is one of the A,C,G or T

Parameters
csymbol
Returns
if symbol is a valid character in alphabet

Definition at line 218 of file Alphabet.h.

void load_serializable_post ( ) throw (ShogunException)
protectedvirtual

Can (optionally) be overridden to post-initialize some member variables which are not PARAMETER::ADD'ed. Make sure that at first the overridden method BASE_CLASS::LOAD_SERIALIZABLE_POST is called.

Exceptions
ShogunExceptionWill be thrown if an error occurres.

Reimplemented from CSGObject.

Definition at line 732 of file Alphabet.cpp.

void print_histogram ( )

print histogram

Definition at line 586 of file Alphabet.cpp.

uint8_t remap_to_bin ( uint8_t  c)

remap element e.g translate ACGT to 0123

Parameters
celement to remap
Returns
remapped element

Definition at line 156 of file Alphabet.h.

uint8_t remap_to_char ( uint8_t  c)

remap element e.g translate 0123 to ACGT

Parameters
celement to remap
Returns
remapped element

Definition at line 166 of file Alphabet.h.

bool set_alphabet ( EAlphabet  alpha)

set alphabet and initialize mapping table (for remap)

Parameters
alphanew alphabet

Definition at line 107 of file Alphabet.cpp.

void translate_from_single_order ( ST *  obs,
int32_t  sequence_length,
int32_t  start,
int32_t  p_order,
int32_t  max_val 
)
static

translate from single order

Parameters
obsobservation
sequence_lengthlength of sequence
startstart
p_orderorder
max_valmaximum value

Definition at line 743 of file Alphabet.cpp.

void translate_from_single_order ( ST *  obs,
int32_t  sequence_length,
int32_t  start,
int32_t  p_order,
int32_t  max_val,
int32_t  gap 
)
static

translate from single order

Parameters
obsobservation
sequence_lengthlength of sequence
startstart
p_orderorder
max_valmaximum value
gapgap

Definition at line 819 of file Alphabet.cpp.

void translate_from_single_order ( float32_t obs,
int32_t  sequence_length,
int32_t  start,
int32_t  p_order,
int32_t  max_val,
int32_t  gap 
)

Definition at line 938 of file Alphabet.cpp.

void translate_from_single_order ( float64_t obs,
int32_t  sequence_length,
int32_t  start,
int32_t  p_order,
int32_t  max_val,
int32_t  gap 
)

Definition at line 942 of file Alphabet.cpp.

void translate_from_single_order ( floatmax_t obs,
int32_t  sequence_length,
int32_t  start,
int32_t  p_order,
int32_t  max_val,
int32_t  gap 
)

Definition at line 946 of file Alphabet.cpp.

void translate_from_single_order_reversed ( ST *  obs,
int32_t  sequence_length,
int32_t  start,
int32_t  p_order,
int32_t  max_val 
)
static

translate from single order reversed

Parameters
obsobservation
sequence_lengthlength of sequence
startstart
p_orderorder
max_valmaximum value

Definition at line 781 of file Alphabet.cpp.

void translate_from_single_order_reversed ( ST *  obs,
int32_t  sequence_length,
int32_t  start,
int32_t  p_order,
int32_t  max_val,
int32_t  gap 
)
static

translate from single order reversed

Parameters
obsobservation
sequence_lengthlength of sequence
startstart
p_orderorder
max_valmaximum value
gapgap

Definition at line 881 of file Alphabet.cpp.

template void translate_from_single_order_reversed< float32_t > ( float32_t obs,
int32_t  sequence_length,
int32_t  start,
int32_t  p_order,
int32_t  max_val,
int32_t  gap 
)

Definition at line 950 of file Alphabet.cpp.

template void translate_from_single_order_reversed< float64_t > ( float64_t obs,
int32_t  sequence_length,
int32_t  start,
int32_t  p_order,
int32_t  max_val,
int32_t  gap 
)

Definition at line 954 of file Alphabet.cpp.

template void translate_from_single_order_reversed< floatmax_t > ( floatmax_t obs,
int32_t  sequence_length,
int32_t  start,
int32_t  p_order,
int32_t  max_val,
int32_t  gap 
)

Definition at line 958 of file Alphabet.cpp.

Member Data Documentation

EAlphabet alphabet
protected

alphabet

Definition at line 350 of file Alphabet.h.

const char * alphabet_names
static
Initial value:
{
"DNA","RAWDNA", "RNA", "PROTEIN", "BINARY", "ALPHANUM",
"CUBE", "RAW", "IUPAC_NUCLEIC_ACID", "IUPAC_AMINO_ACID",
"NONE", "DIGIT", "DIGIT2", "RAWDIGIT", "RAWDIGIT2", "UNKNOWN",
"SNP", "RAWSNP"}

alphabet names

Definition at line 335 of file Alphabet.h.

const uint8_t B_0 = 4
static

B_0

Definition at line 331 of file Alphabet.h.

const uint8_t B_A = 0
static

B_A

Definition at line 323 of file Alphabet.h.

const uint8_t B_C = 1
static

B_C

Definition at line 325 of file Alphabet.h.

const uint8_t B_G = 2
static

B_G

Definition at line 327 of file Alphabet.h.

const uint8_t B_T = 3
static

B_T

Definition at line 329 of file Alphabet.h.

int64_t histogram[1<< (sizeof(uint8_t)*8)]
protected

histogram

Definition at line 362 of file Alphabet.h.

uint8_t maptable_to_bin[1<< (sizeof(uint8_t)*8)]
protected

maptable to bin

Definition at line 358 of file Alphabet.h.

uint8_t maptable_to_char[1<< (sizeof(uint8_t)*8)]
protected

maptable to char

Definition at line 360 of file Alphabet.h.

const uint8_t MAPTABLE_UNDEF = 0xff
static

MAPTABLE UNDEF

Definition at line 333 of file Alphabet.h.

int32_t num_bits
protected

number of bits

Definition at line 354 of file Alphabet.h.

int32_t num_symbols
protected

number of symbols

Definition at line 352 of file Alphabet.h.

bool valid_chars[1<< (sizeof(uint8_t)*8)]
protected

valid chars

Definition at line 356 of file Alphabet.h.


The documentation for this class was generated from the following files:

SHOGUN Machine Learning Toolbox - Documentation