#! /bin/sh
#!perl -w # --*- Perl -*--
eval 'exec perl -x $0 ${1+"$@"}'
    if 0;
#------------------------------------------------------------------------------
#$Author: saulius $
#$Date: 2022-04-06 10:12:29 +0300 (Wed, 06 Apr 2022) $
#$Revision: 9257 $
#$URL: svn+ssh://www.crystallography.net/home/coder/svn-repositories/cod-tools/tags/v3.8.1/scripts/cif2xyz $
#------------------------------------------------------------------------------
#*
#* Read CIF files and print atom coordinates in XYZ format.
#* The default output format is as described in
#* https://en.wikipedia.org/wiki/XYZ_file_format.
#*
#* USAGE:
#*    $0 --options input.cif
#**

use strict;
use warnings;
use COD::AtomProperties;
use COD::CIF::Data qw( get_cell );
use COD::CIF::Data::AtomList qw( get_atom_chemical_type );
use COD::CIF::Parser qw( parse_cif );
use COD::ErrorHandler qw( process_warnings
                          process_errors
                          process_parser_messages
                          report_message );
use COD::Fractional qw( symop_ortho_from_fract );
use COD::Spacegroups::Symop::Algebra qw( symop_vector_mul );
use COD::SOptions qw( getOptions );
use COD::SUsage qw( usage options );
use COD::ToolsVersion qw( get_version_string );

my $die_on_error_level = {
    ERROR   => 1,
    WARNING => 0,
    NOTE    => 0
};

my $atom_properties = \%COD::AtomProperties::atoms;

# Format to print out all floating point numbers -- lattice vector
# coordinates and atomic coordinates:
my $float_format = '%21.14e';

my $print_radii = 0;
my $add_xyz_header = 1;
my $use_parser = 'c'; # Used CIF parser

# Flags indicating which additional data elements should be printed in
# the comment line (the second line of the XYZ formatted block):
my $print_datablock_name = 1;
my $print_chemical_name = 0;
my $print_formula_sum = 0;
my $print_lattice = 1;

#* OPTIONS:
#*   -f, --float-format "%21.14e"
#*                   Specify format for floating point output.
#*                   For Perl, a usual "printf" format can be given.
#*   -H, --human-readable
#*                    Use format "%15.6f" for better human readability.
#*   -M, --machine-readable
#*                    Use format "%21.14e" to maintain precision. Default.
#*
#*   --print-radii
#*                     Append covalent radii to the atom coordinates.
#*   --no-print-radii, --dont-print-radii, --xyz-only
#*                     Print only Cartesian XYZ coordinates for input atoms.
#*                     Default.
#*
#*   --add-xyz-header
#*                     Add the total number of atoms on the first line of
#*                     the output, followed by an empty line. Default.
#*   --no-add-xyz-header,
#*   --do-not-add-xyz-header,
#*   --dont-add-xyz-header
#*                     Do not add the total number of atoms on the first line.
#*
#*   --print-lattice
#*                     Print LATTICE: keyword with the a, b, c lattice vectors
#*                     in the same orthogonal frame as the coordinates. Default.
#*   --no-print-lattice,
#*   --no-lattice,
#*   --do-not-print-lattice,
#*   --dont-print-lattice,
#*                     Do not print lattice information in the comment line.
#*
#*   --print-datablock-name
#*   --print-formula-sum
#*   --print-chemical-name
#*                     These options, and their corresponding negated
#*                     counterparts, control whether the corresponding data
#*                     item should be printed in the comment line.
#*
#*   --no-print-datablock-name, --do-not-print-datablock-name,
#*   --dont-print-datablock-name, --do-not-print-chemical-name,
#*   --no-print-chemical-name, --dont-print-chemical-name,
#*   --no-print-formula-sum, --do-not-print-formula-sum,
#*   --dont-print-formula-sum, --no-datablock-name,
#*   --no-chemical-name, --no-formula-sum
#*                     Switch off the corresponding print options.
#*
#*   --use-perl-parser
#*                     Use Perl parser to parse CIF files.
#*   --use-c-parser
#*                     Use C parser to parse CIF files. Default.
#*
#*   --help, --usage
#*                     Output a short usage message (this message) and exit.
#*   --version
#*                     Output version information and exit.
#**
@ARGV = getOptions(
    '--print-radii'                       => sub { $print_radii = 1 },
    '--no-print-radii,--dont-print-radii' => sub { $print_radii = 0 },
    '--xyz-only'                          => sub { $print_radii = 0 },

    '--add-xyz-header' => sub { $add_xyz_header = 1 },
    '--no-add-xyz-header,--do-not-add-xyz-header,--dont-add-xyz-header' =>
        sub { $add_xyz_header = 0 },

    '--print-datablock-name' => sub { $print_datablock_name = 1 },

    '--no-print-datablock-name,--dont-print-datablock-name,' .
    '--do-not-print-datablock-name,--no-datablock-name'
    => sub { $print_datablock_name = 0 },

    '--print-formula-sum' => sub { $print_formula_sum = 1 },

    '--no-print-formula-sum,--do-not-print-formula-sum,' .
    '--dont-print-formula-sum,--no-formula-sum'
    => sub { $print_formula_sum = 0 },

    '--print-chemical-name' => sub { $print_chemical_name = 1 },

    '--no-print-chemical-name,--do-not-print-chemical-name,' .
    '--dont-print-chemical-name,--no-chemical-name'
    => sub { $print_chemical_name = 0 },

    '--print-lattice' => sub { $print_lattice = 1 },

    '--no-print-lattice,--do-not-print-lattice,' .
    '--dont-print-lattice,--no-lattice'
    => sub { $print_lattice = 0 },

    '-f,--float-format' => \$float_format,
    '-H,--human-readable' => sub {
        $float_format = '%15.6f';
    },
    '-M,--machine-readable' => sub {
        $float_format = '%21.14e';
    },

    '--use-perl-parser' => sub { $use_parser = 'perl' },
    '--use-c-parser'    => sub { $use_parser = 'c' },

    '--options'      => sub { options; exit },
    '--help,--usage' => sub { usage; exit },
    '--version'      => sub { print get_version_string(), "\n"; exit }
);

@ARGV = ('-') unless @ARGV;

binmode STDOUT, ':encoding(UTF-8)';
binmode STDERR, ':encoding(UTF-8)';

my $atom_options;
if (!$print_radii) {
    $atom_options->{'allow_unknown_chemical_types'} = 1;
}
my $parser_options = { 'parser' => $use_parser, 'no_print' => 1 };
for my $filename (@ARGV) {
    my ( $data, $err_count, $messages ) = parse_cif( $filename, $parser_options );
    process_parser_messages( $messages, $die_on_error_level );

    if( !@{$data} || !defined $data->[0] || !defined $data->[0]{name} ) {
        report_message( {
           'program'   => $0,
           'filename'  => $filename,
           'err_level' => 'WARNING',
           'message'   => 'file seems to be empty'
        }, $die_on_error_level->{'WARNING'} );
        next;
    }

    for my $dataset (@{$data}) {
        my $dataname = 'data_' . $dataset->{'name'};

        local $SIG{__WARN__} = sub {
            process_warnings( {
                'message'       => @_,
                'program'       => $0,
                'filename'      => $filename,
                'add_pos'       => $dataname
            }, $die_on_error_level )
        };

        eval {
            local $, = ' ';
            local $\ = "\n";

            my $values = $dataset->{values};

            my @cell = get_cell( $values );

            my $f2o = symop_ortho_from_fract(@cell);

            my $atom_site_tag;
            if( exists $values->{_atom_site_label} ) {
                $atom_site_tag = '_atom_site_label';
            } elsif( exists $values->{_atom_site_type_symbol} ) {
                $atom_site_tag = '_atom_site_type_symbol';
            } else {
                die 'ERROR, neither \'_atom_site_label\' nor ' .
                    "'_atom_site_type_symbol' data items were found\n";
            }

            my $atom_labels = $values->{$atom_site_tag};

            if( $add_xyz_header ) {
                print scalar @{$atom_labels};
                my $separator = "";
                if( $print_datablock_name ) {
                    local $\ = '';
                    $separator = ' ';
                    print $dataset->{name};
                }
                if( $print_formula_sum ) {
                    local $\ = '';
                    print $separator;
                    $separator = ' ';
                    if( exists $values->{_chemical_formula_sum} ) {
                        my $formula = $values->{_chemical_formula_sum}[0];
                        $formula =~ s/\s//g;
                        print $formula;
                    } else {
                        print "''";
                    }
                }
                if( $print_chemical_name ) {
                    local $\ = '';
                    print $separator;
                    $separator = ' ';
                    if( exists $values->{_chemical_name_systematic} ) {
                        my $chemname = $values->{_chemical_name_systematic}[0];
                        $chemname =~ s/\n//g;
                        print $chemname;
                    } else {
                        print "''";
                    }
                }
                if( $print_lattice ) {
                    local $\ = '';
                    print $separator;
                    $separator = ' ';
                    print 'LATTICE: ';
                    # Transpose the @$f2o matrix for printing:
                    my @lattice = (
                        [$f2o->[0][0], $f2o->[1][0], $f2o->[2][0] ],
                        [$f2o->[0][1], $f2o->[1][1], $f2o->[2][1] ],
                        [$f2o->[0][2], $f2o->[1][2], $f2o->[2][2] ],
                        );
                    print join( ' ', map
                                {
                                    join(' ', map
                                         {sprintf($float_format,$_)}
                                         @$_)
                                }
                                @lattice );
                }
                print '';
            }

            for (my $i = 0; $i < @{$atom_labels}; $i++) {
                my $atom_type =
                        get_atom_chemical_type( $dataset, $i, $atom_options );
                my $covalent_radius =
                        $atom_properties->{$atom_type}{'covalent_radius'};

                my @atom_xyz = map { s/[(][0-9]+[)]$//; $_ }
                               map { $values->{"_atom_site_fract_$_"}[$i] }
                                   qw( x y z );
                my $coordinates_ortho = symop_vector_mul( $f2o, \@atom_xyz );

                printf '%-2s ' . ($float_format . ' ') x 3,
                    $atom_type, @{$coordinates_ortho};

                print( $print_radii ?
                       sprintf($float_format,$covalent_radius) : '' );
            }
        };
        if ($@) {
            process_errors( {
              'message'       => $@,
              'program'       => $0,
              'filename'      => $filename,
              'add_pos'       => $dataname
            }, $die_on_error_level->{'ERROR'} );
        }
    }
}
