#!/usr/bin/env perl
#
#   A software toolkit for the interconversion of standard data models for phenotypic data
#
#   This file is part of Convert::Pheno
#
#   Last Modified: May/10/2024
#
#   $VERSION taken from Convert::Pheno
#
#   Copyright (C) 2022-2025 Manuel Rueda - CNAG (manuel.rueda@cnag.eu)
#
#   License: Artistic License 2.0

package main;

use strict;
use warnings;
use autodie;
use feature      qw(say);
use Getopt::Long qw(:config no_ignore_case);
use Pod::Usage;
use Data::Dumper;
use Sys::Hostname;
use POSIX                          qw(strftime);
use File::Spec::Functions          qw(catdir catfile);
use File::ShareDir::ProjectDistDir qw(dist_dir);
use FindBin                        qw($Bin);
use lib "$Bin/../lib";
use Term::ANSIColor qw(:constants);
use Convert::Pheno;
use Convert::Pheno::IO::CSVHandler;

$Data::Dumper::Sortkeys = 1;

# Defining a few variables
my $out_dir     = '.';
my $share_dir   = dist_dir('Convert-Pheno');
my $schema_file = catfile( $share_dir, 'schema', 'mapping.json' );
my $color       = 1;
my $stream      = 0;
my $ohdsi_db    = 0;

# Reading arguments
GetOptions(
    'ipxf=s'                      => \my $in_pxf,                              # string
    'ibff=s'                      => \my $in_bff,                              # string
    'iredcap=s'                   => \my $in_redcap,                           # string
    'icdisc=s'                    => \my $in_cdisc,                            # string
    'iomop=s{1,}'                 => \my @omop_files,                          # array
    'icsv=s'                      => \my $in_csv,                              # string
    'obff=s'                      => \my $out_bff,                             # string
    'opxf=s'                      => \my $out_pxf,                             # string
    'ocsv=s'                      => \my $out_csv,                             # string
    'ojsonf=s'                    => \my $out_jsonf,                           # string
    'ojsonld=s'                   => \my $out_jsonld,                          # string
    'out-dir=s'                   => \$out_dir,                                # string
    'help|?'                      => \my $help,                                # flag
    'man'                         => \my $man,                                 # flag
    'mapping-file=s'              => \my $mapping_file,                        # string
    'max-lines-sql=i'             => \my $max_lines_sql,                       # integer
    'search=s'                    => \my $search,                              # string
    'text-similarity-method=s'    => \my $text_similarity_method,              # string
    'min-text-similarity-score=f' => \my $min_text_similarity_score,           # floating
    'debug=i'                     => \my $debug,                               # integer
    'verbose|v'                   => \my $verbose,                             # flag
    'color!'                      => \$color,                                  # flag
    'separator|sep=s'             => \my $sep,                                 # str
    'schema-file=s'               => \$schema_file,                            # str
    'exposures-file=s'            => \my $exposures_file,                      # str
    'stream!'                     => \$stream,                                 # flag
    'sql2csv'                     => \my $sql2csv,                             # flag
    'test'                        => \my $test,                                # flag
    'ohdsi-db'                    => \$ohdsi_db,                               # flag
    'omop-tables=s{1,}'           => \my @omop_tables,                         # array
    'redcap-dictionary|rcd=s'     => \my $redcap_dictionary,                   # string
    'path-to-ohdsi-db=s'          => \my $path_to_ohdsi_db,                    # string
    'print-hidden-labels|phl'     => \my $print_hidden_labels,                 # flag
    'self-validate-schema|svs'    => \my $self_validate_schema,                # flag
    'O'                           => \my $overwrite,                           # flag
    'username=s'                  => \my $username,                            # string
    'log:s'                       => \my $log,                                 # opt-string
    'version|V'                   => sub { say "$0 Version $VERSION"; exit; }
) or pod2usage(2);
pod2usage(1)                              if $help;
pod2usage( -verbose => 2, -exitval => 0 ) if $man;

# Validate certain CLI options
# NB: The sequence of validations is important
my @validation_checks = (
    {
        condition => sub {
            !(     ( defined $in_pxf && -f $in_pxf )
                || ( defined $in_bff    && -f $in_bff )
                || ( defined $in_redcap && -f $in_redcap )
                || ( defined $in_cdisc  && -f $in_cdisc )
                || ( defined $in_csv    && -f $in_csv )
                || ( @omop_files        && -f $omop_files[0] ) );
        },
        message => "Please specify a valid input [-i input-type] <infile>\n",
    },
    {
        condition => sub { !-d $out_dir },
        message   => "Please specify a valid directory for --out-dir\n",
    },
    {
        condition => sub { ( $in_redcap || $in_cdisc ) && !$redcap_dictionary },
        message   =>
          "Please specify a valid REDCap data dictionary --rcd <file>\n",
    },
    {
        condition =>
          sub { ( $in_redcap || $in_cdisc || $in_csv ) && !$mapping_file },
        message =>
          "Please specify a valid mapping file --mapping-file <file>\n",
    },
    {
        condition =>
          sub { @omop_files && $omop_files[0] !~ m/\.(csv|sql|tsv)/i },
        message =>
          "Please specify a valid OMOP-CDM file(s) (e.g., *csv or .sql)\n",
    },
    {
        condition => sub { @omop_tables && !@omop_files },
        message   => "The flag <--omop-tables> is only valid with <-iomop>\n",
    },
    {
        condition => sub { $stream && $out_pxf },
        message   => "The flag <--stream> is only valid with <-obff>\n",
    },
    {
        condition => sub { $stream && $sql2csv },
        message   =>
"The flags <--stream> and <--sql2csv> are mutually exclusive.\nIf you are using <--stream> is because you are likely processing huge files and we don't want to duplicate them in your HDD\n",
    },
    {
        condition => sub {
            @omop_files
              && ( ( defined $out_bff && $out_bff !~ m/\.json/i )
                || ( defined $out_pxf && $out_pxf !~ m/\.json/i ) );
        },
        message =>
"The flag <--iomops> only supports output files in <json|json.gz> format\n",
    },
    {
        condition => sub { !-f $schema_file },
        message   =>
"Please specify a valid schema for the mapping file --schema-file <file>\n",
    },
    {
        condition => sub { defined $path_to_ohdsi_db && !-d $path_to_ohdsi_db },
        message   =>
"Please specify a valid directory for the mapping file --path-to-ohdsi-db <dir>\n",
    },
    {
        condition => sub { defined $exposures_file && !-f $exposures_file },
        message   => "Please specify a valid --exposures-file <file>\n",
    },
    {
        condition => sub {
            ( $out_csv || $out_jsonf || $out_jsonld )
              && ( !$in_bff && !$in_pxf );
        },
        message =>
"Sorry, <--ocsv>, <--ojsonf> and <--ojsonf> are only compatible with <--ibff> or <--ipxf>\n",
    },
);

foreach my $check (@validation_checks) {
    if ( $check->{condition}->() ) {
        pod2usage( -message => $check->{message}, -exitval => 1 );
    }
}

# Turning color off if argument <--no-color>
$ENV{'ANSI_COLORS_DISABLED'} = 1 unless $color;

# Defining $out_file
my $out_file =
    $out_pxf    ? catfile( $out_dir, $out_pxf )
  : $out_bff    ? catfile( $out_dir, $out_bff )
  : $out_csv    ? catfile( $out_dir, $out_csv )
  : $out_jsonf  ? catfile( $out_dir, $out_jsonf )
  : $out_jsonld ? catfile( $out_dir, $out_jsonld )
  :               catfile( $out_dir, 'individuals.json' );

# Defining $log_file
my $log_file =
  catfile( $out_dir, ( $log ? $log : 'convert-pheno-log.json' ) );    # // works for undef

# Defining method
my $in_type =
    $in_pxf     ? 'pxf'
  : $in_bff     ? 'bff'
  : $in_redcap  ? 'redcap'
  : $in_cdisc   ? 'cdisc'
  : $in_csv     ? 'csv'
  : @omop_files ? 'omop'
  :               'bff';
my $out_type =
    $out_pxf    ? 'pxf'
  : $out_bff    ? 'bff'
  : $out_csv    ? 'csv'
  : $out_jsonf  ? 'jsonf'
  : $out_jsonld ? 'jsonld'
  :               'bff';
my $method = $in_type . '2' . $out_type;

#Define job id
my $id = time . substr( "00000$$", -5 );

# Defining data as a hashref
my $data = {
    out_dir     => $out_dir,
    in_textfile => 1,
    in_file     => $in_pxf ? $in_pxf
    : $in_bff    ? $in_bff
    : $in_redcap ? $in_redcap
    : $in_cdisc  ? $in_cdisc
    : $in_csv    ? $in_csv
    : undef,
    in_files                  => \@omop_files // undef,        # only for Omop
    method                    => $method,
    sep                       => $sep,
    sql2csv                   => $sql2csv,
    redcap_dictionary         => $redcap_dictionary,
    mapping_file              => $mapping_file,
    exposures_file            => $exposures_file,
    search                    => $search,
    ohdsi_db                  => $ohdsi_db,
    omop_tables               => \@omop_tables,
    username                  => $username,
    text_similarity_method    => $text_similarity_method,
    min_text_similarity_score => $min_text_similarity_score,
    self_validate_schema      => $self_validate_schema,
    path_to_ohdsi_db          => $path_to_ohdsi_db,
    print_hidden_labels       => $print_hidden_labels,
    max_lines_sql             => $max_lines_sql,
    stream                    => $stream,
    schema_file               => $schema_file,
    out_file                  => $out_file,
    id                        => $id,
    test                      => $test,
    debug                     => $debug,
    log                       => $log,
    verbose                   => $verbose
};
print Dumper $data if $debug;

# Start printing to STDOUT
say BOLD CYAN program_header($VERSION), RESET if $verbose;

# Save log file if $log (before $data gets blessed)
write_log( $log_file, $data ) if defined $log;

#############################
# START DATA TRANSFORMATION #
#############################

convert( $out_file, $data );

###########################
# END DATA TRANSFORMATION #
###########################

sub convert {

    my ( $o_file, $l_data ) = @_;

    # Before proceeding we ask the user what to do with $o_file
    if ($overwrite) {
        unlink($o_file) if -e $o_file;
    }
    else {
        ask_overwrite($o_file);
    }

    # Start verbose
    print BOLD BLUE program_body($l_data), RESET if $verbose;

    # Creating object
    my $convert = Convert::Pheno->new($l_data);

    # Running $method and writing the results to $o_file
    my $method = $l_data->{method};

    # Perform the conversion based on the method
    my $data;

    # For omop2bff and omop2pxf we serialize to JSON (not YAML) by individual
    if ( $stream || $method eq 'omop2bff' || $method eq 'omop2pxf' ) {
        say BOLD GREEN "Writing <$o_file> file\n", RESET if $verbose;
        $convert->$method;
    }
    else {
        $data = $convert->$method;
    }

    # Determine the appropriate writing method
    if ($data) {
        say BOLD GREEN "Writing <$o_file> file\n", RESET if $verbose;
        if ( $method eq 'bff2csv' || $method eq 'pxf2csv' ) {
            write_csv(
                {
                    sep      => $l_data->{sep} // ';',
                    filepath => $o_file,
                    headers  => get_headers($data),
                    data     => $data,
                }
            );
        }
        else {
            io_yaml_or_json(
                {
                    filepath => $o_file,
                    mode     => 'write',
                    data     => $data,
                }
            );
        }
    }

    # Finish
    print BOLD GREEN program_footer(), RESET if $verbose;
}

sub program_header {

    my $version = shift;
    my $str     = <<EOF;
****************************************
*  Phenotypic Data Model Convert Tool  *
*          - CONVERT-PHENO -           *
*            Version: $version             *
*   (C) 2022-2025 Manuel Rueda, CNAG   *
*       The Artistic License 2.0       *
****************************************
EOF
    return $str;
}

sub program_footer {

    my $str = <<EOF;
All done!

EOF
    return $str;
}

sub program_body {

    my $l_data = shift;
    my $file =
      $l_data->{method} =~ m/^omop/
      ? join ',', @{ $l_data->{in_files} }
      : $l_data->{in_file};
    my $msg = <<EOF;
==== METHOD: <$l_data->{method}> ====
Processing: <$file>
EOF
    return $msg;
}

sub write_log {

    my ( $log, $data ) = @_;

    # Detecting the number of logical CPUs across different OSes
    my $os = $^O;
    chomp(
        my $ncpuhost =
          lc($os) eq 'darwin' || lc($os) eq 'freebsd' ? qx{sysctl -n hw.ncpu}
        : $os eq 'MSWin32' ? qx{wmic cpu get NumberOfLogicalProcessors}
        :                    qx{/usr/bin/nproc} // 1
    );

    # For the Windows command, the result will also contain the string
    # "NumberOfLogicalProcessors" which is the header of the output.
    # So we need to extract the actual number from it:
    if ( $os eq 'MSWin32' ) {
        ($ncpuhost) = $ncpuhost =~ /(\d+)/;
    }
    $ncpuhost = 0 + $ncpuhost;    # coercing it to be a number

    my $info = {
        date      => ( strftime "%a %b %e %H:%M:%S %Y", localtime ),
        ncpuhost  => $ncpuhost,
        hostname  => hostname,
        id        => $data->{id},                                      # string
        version   => $VERSION,
             user => $ENV{'LOGNAME'}
          || $ENV{'USER'}
          || $ENV{'USERNAME'}
          || 'dummy-user'
    };

    # Saving file
    say BOLD GREEN "Writing <$log> file\n" if $verbose;
    io_yaml_or_json(
        {
            filepath => $log,
            mode     => 'write',
            data     => { info => $info, data => $data }
        }
    );
}

sub ask_overwrite {

    my $filepath = shift;

    # Check if the file exists
    if ( -f $filepath ) {
        my $attempt_count = 0;
        my $max_attempts  = 5;

        while ( $attempt_count < $max_attempts ) {
            print BOLD RED "<$filepath> exists. Overwrite? [Y/n]: ";
            my $input = <STDIN>;
            chomp $input;

            if ( $input eq 'Y' ) {
                unlink($filepath)
                  or die "Failed to delete <$filepath>: $!\n";
                print RESET;
                return;    # Exit after successful deletion
            }
            elsif ( $input eq 'n' ) {
                die "Operation aborted by the user.\n";
            }
            else {
                say "Invalid input. Please enter 'Y' or 'n'.";
                $attempt_count++;
            }
        }
        die "Too many invalid attempts. Operation aborted.\n";
    }
}

=head1 NAME

convert-pheno - A script to interconvert common data models for phenotypic data

=head1 SYNOPSIS

 convert-pheno [-i input-type] <infile> [-o output-type] <outfile> [-options]

     Arguments:                       
       (input-type): 
             -ibff                    Beacon v2 Models ('individuals' JSON|YAML) file
             -iomop                   OMOP-CDM CSV files or PostgreSQL dump
             -ipxf                    Phenopacket v2 (JSON|YAML) file
             -iredcap (experimental)  REDCap (raw data) export CSV file
             -icdisc  (experimental)  CDISC-ODM v1 XML file
             -icsv    (experimental)  Raw data CSV

             (Wish-list)
             #-iopenehr               openEHR
             #-ifhir                  HL7/FHIR

       (output-type):
             -obff                    Beacon v2 Models ('individuals' JSON|YAML) file
             -opxf                    Phenopacket v2 (JSON|YAML) file

             (Wish-list)
             #-oomop                  OMOP-CDM PostgreSQL dump

             Compatible with -i(bff|pxf):
             -ocsv                    Flatten data to CSV
             -ojsonf                  Flatten data to 1D-JSON (or 1D-YAML if suffix is .yml|.yaml)
             -ojsonld (experimental)  JSON-LD (interoperable w/ RDF ecosystem; YAML-LD if suffix is .ymlld|.yamlld)

     Options:
       -exposures-file <file>         CSV file with a list of 'concept_id' considered to be exposures (with -iomop)
       -mapping-file <file>           Fields mapping YAML (or JSON) file
       -max-lines-sql <number>        Maximum lines read per table from SQL dump [500]
       -min-text-similarity-score <score> Minimum score for cosine similarity (or Sorensen-Dice coefficient) [0.8] (to be used with --search mixed)
       -ohdsi-db                      Use Athena-OHDSI database (~2.2GB) with -iomop
       -omop-tables <tables>          OMOP-CDM tables to be processed. Tables <CONCEPT> and <PERSON> are always included.
       -out-dir <directory>           Output (existing) directory
       -O                             Overwrite output file
       -path-to-ohdsi-db <directory>  Directory for the file <ohdsi.db>
       -phl|print-hidden-labels       Print original values (before DB mapping) of text fields <_labels>
       -rcd|redcap-dictionary <file>  REDCap data dictionary CSV file
       -schema-file <file>            Alternative JSON Schema for mapping file
       -search <type>                 Type of search [>exact|mixed]
       -svs|self-validate-schema      Perform a self-validation of the JSON schema that defines mapping (requires IO::Socket::SSL)
       -sep|separator <char>          Delimiter character for CSV files [;] e.g., --sep $'\t'
       -stream                        Enable incremental processing with -iomop and -obff [>no-stream|stream]
       -sql2csv                       Print SQL TABLES (only valid with -iomop). Mutually exclusive with --stream
       -test                          Does not print time-changing-events (useful for file-based cmp)
       -text-similarity-method <method> The method used to compare values to DB [>cosine|dice]
       -u|username <username>         Set the username

     Generic Options:
       -debug <level>                 Print debugging level (from 1 to 5, being 5 max)
       -help                          Brief help message
       -log                           Save log file (JSON). If no argument is given then the log is named [convert-pheno-log.json]
       -man                           Full documentation
       -no-color                      Don't print colors to STDOUT [>color|no-color]
       -v|verbose                     Verbosity on
       -V|version                     Print Version

=head1 DESCRIPTION

C<convert-pheno> is a command-line front-end to the CPAN's module L<Convert::Pheno>.

=head1 SUMMARY

C<convert-pheno> is a command-line front-end to the CPAN's module L<Convert::Pheno> to interconvert common data models for phenotypic data.

=head1 INSTALLATION

If you plan to only use the CLI, we recommend installing it via CPAN. See details below.

=head2 Non containerized

The script runs on command-line Linux and it has been tested on Debian/RedHat/MacOS based distributions (only showing commands for Debian's). Perl 5 is installed by default on Linux, 
but we will install a few CPAN modules with C<cpanminus>.

=head3 Method 1: From CPAN

First install system level dependencies:

  sudo apt-get install cpanminus libbz2-dev zlib1g-dev libperl-dev libssl-dev

Now you have two choose between one of the 3 options below:

B<Option 1:> System-level installation:

  cpanm --notest --sudo Convert::Pheno
  convert-pheno -h

B<Option 2:> Install Convert-Pheno and the dependencies at C<~/perl5>

  cpanm --local-lib=~/perl5 local::lib && eval $(perl -I ~/perl5/lib/perl5/ -Mlocal::lib)
  cpanm --notest Convert::Pheno
  convert-pheno --help

To ensure Perl recognizes your local modules every time you start a new terminal, you should type:

  echo 'eval $(perl -I ~/perl5/lib/perl5/ -Mlocal::lib)' >> ~/.bashrc

B<Option 3:> Install Convert-Pheno and the dependencies in a "virtual environment" (at C<local/>) . We'll be using the module C<Carton> for that:

  mkdir local
  cpanm --notest --local-lib=local/ Carton
  echo "requires 'Convert::Pheno';" > cpanfile
  export PATH=$PATH:local/bin; export PERL5LIB=$(pwd)/local/lib/perl5:$PERL5LIB
  carton install
  carton exec -- convert-pheno -help

=head3 Method 2: From CPAN in a Conda environment

Please follow L<these instructions|https://cnag-biomedical-informatics.github.io/convert-pheno/download-and-installation/#__tabbed_1_2>.

=head3 Method 3: From Github

  git clone https://github.com/cnag-biomedical-informatics/convert-pheno.git
  cd convert-pheno

Install system level dependencies:
  
  sudo apt-get install cpanminus libbz2-dev zlib1g-dev libperl-dev libssl-dev

Now you have two choose between one of the 3 options below:

B<Option 1:> Install dependencies (they're harmless to your system) as C<sudo>:

  cpanm --notest --sudo --installdeps .
  bin/convert-pheno --help            

B<Option 2:> Install the dependencies at C<~/perl5>:

  cpanm --local-lib=~/perl5 local::lib && eval $(perl -I ~/perl5/lib/perl5/ -Mlocal::lib)
  cpanm --notest --installdeps .
  bin/convert-pheno --help

To ensure Perl recognizes your local modules every time you start a new terminal, you should type:

  echo 'eval $(perl -I ~/perl5/lib/perl5/ -Mlocal::lib)' >> ~/.bashrc

B<Option 3:> Install the dependencies in a "virtual environment" (at C<local/>) . We'll be using the module C<Carton> for that:

  mkdir local
  cpanm --notest --local-lib=local/ Carton
  export PATH=$PATH:local/bin; export PERL5LIB=$(pwd)/local/lib/perl5:$PERL5LIB
  carton install
  carton exec -- bin/convert-pheno -help

=head2 Containerized

=head3 Method 4: From Docker Hub

Download the latest version of the Docker image (supports both amd64 and arm64 architectures) from L<Docker Hub|https://hub.docker.com/r/manuelrueda/convert-pheno> by executing:

  docker pull manuelrueda/convert-pheno:latest
  docker image tag manuelrueda/convert-pheno:latest cnag/convert-pheno:latest

See additional instructions below.

=head3 Method 5: With Dockerfile

Please download the C<Dockerfile> from the repo:

  wget https://raw.githubusercontent.com/cnag-biomedical-informatics/convert-pheno/main/Dockerfile

And then run:

  docker buildx build -t cnag/convert-pheno:latest .

=head3 Additional instructions for Methods 4 and 5

To run the container (detached) execute:

  docker run -tid -e USERNAME=root --name convert-pheno cnag/convert-pheno:latest

To enter:

  docker exec -ti convert-pheno bash

The command-line executable can be found at:

  /usr/share/convert-pheno/bin/convert-pheno

The default container user is C<root> but you can also run the container as C<$UID=1000> (C<dockeruser>). 

  docker run --user 1000 -tid --name convert-pheno cnag/convert-pheno:latest
 
Alternatively, you can use C<make> to perform all the previous steps:

  wget https://raw.githubusercontent.com/cnag-biomedical-informatics/convert-pheno/main/Dockerfile
  wget https://raw.githubusercontent.com/cnag-biomedical-informatics/convert-pheno/main/makefile.docker
  make -f makefile.docker install
  make -f makefile.docker run
  make -f makefile.docker enter

=head3 Mounting volumes

Docker containers are fully isolated. If you need the mount a volume to the container please use the following syntax (C<-v host:container>). 
Find an example below (note that you need to change the paths to match yours):

  docker run -tid --volume /media/mrueda/4TBT/data:/data --name convert-pheno-mount cnag/convert-pheno:latest

Then I will do something like this:

  # First I create an alias to simplify invocation (from the host)
  alias convert-pheno='docker exec -ti convert-pheno-mount /usr/share/convert-pheno/bin/convert-pheno'

  # Now I use the alias to run the command (note that I use the flag --out-dir to specify the output directory)
  convert-pheno -ibff /data/individuals.json -opxf pxf.json --out-dir /data

=head3 System requirements

  * Ideally a Debian-based distribution (Ubuntu or Mint), but any other (e.g., CentOs, OpenSuse, MacOS) should do as well.
    (It should also work on macOS and Windows Server, but we are only providing information for Linux here)
  * Perl 5 (>= 5.26 core; installed by default in most Linux distributions). Check the version with "perl -v".
  * >= 4GB of RAM
  * 1 core
  * At least 16GB HDD

=head1 HOW TO RUN CONVERT-PHENO

For executing convert-pheno you will need:

=over

=item Input file(s):
      
A text file in one of the accepted formats. With C<--iomop> I/O files can be gzipped.

=item Optional: 

Athena-OHDSI database

The database file is available at this L<link|https://drive.google.com/drive/folders/1-5Ywf-hhwb8bX1sRNV2Tf3EjH4TCaC8P?usp=sharing> (~2.2GB). The database may be needed when using C<-iomop>.

Regardless if you're using the containerized or non-containerized version, the download procedure is the same. For CLI users, Google makes it difficult to use C<wget>, C<curl> or C<aria2c> so we will use a C<Python> module instead:

 $ pip install gdown

And then run the following script

 import gdown

 url = 'https://drive.google.com/uc?export=download&id=1-Ls1nmgxp-iW-8LkRIuNNdNytXa8kgNw'
 output = './ohdsi.db'
 gdown.download(url, output, quiet=False)

Once downloaded, you have two options:

a) Move the file C<ohdsi.db> inside the C<share/db/> directory.

or

b) Use the option C<--path-to-ohdsi-db>

=back

B<Examples:>

 $ bin/convert-pheno -ipxf phenopackets.json -obff individuals.json

 $ $path/convert-pheno -ibff individuals.json -opxf phenopackets.yaml --out-dir my_out_dir 

 $ $path/convert-pheno -iredcap redcap.csv -opxf phenopackets.json --redcap-dictionary redcap_dict.csv --mapping-file mapping_file.yaml

 $ $path/convert-pheno -iomop dump.sql -obff individuals.json

 $ $path/convert-pheno -iomop dump.sql.gz -obff individuals.json.gz --stream -omop-tables measurement -verbose

 $ $path/convert-pheno -cdisc cdisc_odm.xml -obff individuals.json --rcd redcap_dict.csv --mapping-file mapping_file.yaml --search mixed --min-text-similarity-score 0.6

 $ $path/convert-pheno -iomop *csv -obff individuals.json -sep ','

 $ carton exec -- $path/convert-pheno -ibff individuals.json -opxf phenopackets.json # If using Carton

=head2 COMMON ERRORS AND SOLUTIONS

 * Error message: CSV_XS ERROR: 2023 - EIQ - QUO character not allowed @ rec 1 pos 21 field 1
   Solution: Make sure you use the right character separator for your data with --sep <char>. 
             The script tries to guess it from the file extension, but sometimes extension and actual separator do not match. 
             When using REDCap as input, make sure that <--iredcap> and <--rcd> files use the same separator field.
             The defauly value for the separator is ';'. 
   Example for tab separator in CLI.
    --sep  $'\t' 

 * Error message: Foo
   Solution: Bar

=head1 CITATION

The author requests that any published work that utilizes C<Convert-Pheno> includes a cite to the the following reference:

Rueda, M et al., (2024). Convert-Pheno: A software toolkit for the interconversion of standard data models for phenotypic data. Journal of Biomedical Informatics. L<DOI|https://doi.org/10.1016/j.jbi.2023.104558>

=head1 AUTHOR 

Written by Manuel Rueda, PhD. Info about CNAG can be found at L<https://www.cnag.eu>.

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2022-2025, Manuel Rueda - CNAG.

This program is free software, you can redistribute it and/or modify it under the terms of the L<Artistic License version 2.0|perlartistic>.

=cut
