(git:0de0cc2)
topology_cif.F
Go to the documentation of this file.
1 !--------------------------------------------------------------------------------------------------!
2 ! CP2K: A general program to perform molecular dynamics simulations !
3 ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 ! !
5 ! SPDX-License-Identifier: GPL-2.0-or-later !
6 !--------------------------------------------------------------------------------------------------!
7 
8 ! **************************************************************************************************
9 !> \brief Handles CIF (Crystallographic Information File) files
10 !> \author Teodoro Laino [tlaino]
11 !> \date 12.2008
12 ! **************************************************************************************************
14  USE cell_methods, ONLY: cell_create,&
17  USE cell_types, ONLY: cell_release,&
18  cell_type,&
19  pbc,&
23  cp_logger_type,&
24  cp_to_string
28  parser_get_object,&
30  USE cp_parser_types, ONLY: cp_parser_type,&
33  USE fparser, ONLY: evalf,&
34  finalizef,&
35  initf,&
36  parsef
38  section_vals_type
39  USE kinds, ONLY: default_string_length,&
40  dp
41  USE memory_utilities, ONLY: reallocate
42  USE message_passing, ONLY: mp_para_env_type
43  USE string_table, ONLY: id2str,&
44  s2s,&
45  str2id
46  USE string_utilities, ONLY: s2a
47  USE topology_types, ONLY: atom_info_type,&
49 #include "./base/base_uses.f90"
50 
51  IMPLICIT NONE
52 
53  CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'topology_cif'
54 
55  PRIVATE
56  PUBLIC :: read_coordinate_cif
57 
58 CONTAINS
59 
60 ! **************************************************************************************************
61 !> \brief Performs the real task of reading the proper information from the CIF
62 !> file
63 !> \param topology ...
64 !> \param para_env ...
65 !> \param subsys_section ...
66 !> \date 12.2008
67 !> \par Format Information implemented:
68 !> _chemical_name
69 !> _chemical_formula_sum
70 !> _cell_length_a
71 !> _cell_length_b
72 !> _cell_length_c
73 !> _cell_angle_alpha
74 !> _cell_angle_beta
75 !> _cell_angle_gamma
76 !> _symmetry_space_group_name_h-m
77 !> _symmetry_equiv_pos_as_xyz
78 !> _space_group_symop_operation_xyz
79 !> _atom_site_label
80 !> _atom_site_type_symbol
81 !> _atom_site_fract_x
82 !> _atom_site_fract_y
83 !> _atom_site_fract_z
84 !>
85 !> \author Teodoro Laino [tlaino]
86 ! **************************************************************************************************
87  SUBROUTINE read_coordinate_cif(topology, para_env, subsys_section)
89  TYPE(mp_para_env_type), POINTER :: para_env
90  TYPE(section_vals_type), POINTER :: subsys_section
91 
92  CHARACTER(len=*), PARAMETER :: routinen = 'read_coordinate_cif'
93  INTEGER, PARAMETER :: nblock = 1000
94  REAL(kind=dp), PARAMETER :: threshold = 1.0e-3_dp
95 
96  CHARACTER(LEN=1) :: sep
97  CHARACTER(LEN=default_string_length) :: s_tag, strtmp
98  INTEGER :: field_kind, field_label, field_symbol, field_x, field_y, field_z, handle, ii, &
99  iln0, iln1, iln2, iln3, isym, iw, jj, natom, natom_orig, newsize, num_fields
100  LOGICAL :: check, found, my_end
101  REAL(kind=dp) :: pfactor
102  REAL(kind=dp), DIMENSION(3) :: r, r1, r2, s, s_tmp
103  TYPE(atom_info_type), POINTER :: atom_info
104  TYPE(cell_type), POINTER :: cell
105  TYPE(cp_logger_type), POINTER :: logger
106  TYPE(cp_parser_type) :: parser
107 
108  NULLIFY (logger)
109  logger => cp_get_default_logger()
110  iw = cp_print_key_unit_nr(logger, subsys_section, "PRINT%TOPOLOGY_INFO/CIF_INFO", &
111  extension=".subsysLog")
112  CALL timeset(routinen, handle)
113  pfactor = section_get_rval(subsys_section, "TOPOLOGY%MEMORY_PROGRESSION_FACTOR")
114 
115  ! Element is assigned on the basis of the atm_name
116  topology%aa_element = .true.
117 
118  atom_info => topology%atom_info
119  CALL reallocate(atom_info%id_molname, 1, nblock)
120  CALL reallocate(atom_info%id_resname, 1, nblock)
121  CALL reallocate(atom_info%resid, 1, nblock)
122  CALL reallocate(atom_info%id_atmname, 1, nblock)
123  CALL reallocate(atom_info%r, 1, 3, 1, nblock)
124  CALL reallocate(atom_info%atm_mass, 1, nblock)
125  CALL reallocate(atom_info%atm_charge, 1, nblock)
126  CALL reallocate(atom_info%occup, 1, nblock)
127  CALL reallocate(atom_info%beta, 1, nblock)
128  CALL reallocate(atom_info%id_element, 1, nblock)
129 
130  IF (iw > 0) WRITE (iw, "(/,A,A)") " Reading in CIF file ", trim(topology%coord_file_name)
131 
132  ! Create cell
133  NULLIFY (cell)
134  CALL cell_create(cell, tag="CELL_CIF")
135  CALL read_cell_cif(topology%coord_file_name, cell, para_env)
136  CALL write_cell(cell, subsys_section)
137 
138  CALL parser_create(parser, topology%coord_file_name, &
139  para_env=para_env, apply_preprocessing=.false.)
140 
141  ! Check for _chemical_name
142  CALL parser_search_string(parser, "_chemical_name", ignore_case=.false., found=found, &
143  begin_line=.false., search_from_begin_of_file=.true.)
144  IF (found) THEN
145  IF (iw > 0) WRITE (iw, '(/,A)') " CIF_INFO| _chemical_name :: "//trim(parser%input_line(parser%icol:))
146  END IF
147 
148  ! Check for _chemical_formula_sum
149  CALL parser_search_string(parser, "_chemical_formula_sum", ignore_case=.false., found=found, &
150  begin_line=.false., search_from_begin_of_file=.true.)
151  IF (found) THEN
152  IF (iw > 0) WRITE (iw, '(A)') " CIF_INFO| _chemical_formula_sum :: "//trim(parser%input_line(parser%icol:))
153  END IF
154 
155  ! Parse atoms info and fractional coordinates
156  num_fields = 0
157  field_label = -1; field_symbol = -1; field_x = -1; field_y = -1; field_z = -1
158  CALL parser_search_string(parser, "_atom_site_", ignore_case=.false., found=found, &
159  begin_line=.false., search_from_begin_of_file=.true.)
160  DO WHILE (index(parser%input_line, "_atom_site_") /= 0)
161  num_fields = num_fields + 1
162  IF (index(parser%input_line, "_atom_site_label") /= 0) field_label = num_fields
163  IF (index(parser%input_line, "_atom_site_type_symbol") /= 0) field_symbol = num_fields
164  IF (index(parser%input_line, "_atom_site_fract_x") /= 0) field_x = num_fields
165  IF (index(parser%input_line, "_atom_site_fract_y") /= 0) field_y = num_fields
166  IF (index(parser%input_line, "_atom_site_fract_z") /= 0) field_z = num_fields
167  CALL parser_get_next_line(parser, 1)
168  END DO
169 
170  ! Check for missing fields.
171  IF (field_label < 0) cpabort("Field _atom_site_label not found in CIF file.")
172  IF (field_x < 0) cpabort("Field _atom_site_fract_x not found in CIF file.")
173  IF (field_y < 0) cpabort("Field _atom_site_fract_y not found in CIF file.")
174  IF (field_z < 0) cpabort("Field _atom_site_fract_z not found in CIF file.")
175 
176  ! Read atom kind from the symbol field if it's present, otherwise fall back to the label field.
177  IF (field_symbol > 0) THEN
178  field_kind = field_symbol
179  ELSE
180  field_kind = field_label
181  END IF
182 
183  ! Parse real info
184  natom = 0
185  DO WHILE ((index(parser%input_line, "loop_") == 0) .AND. (parser%input_line(1:1) /= "_"))
186  natom = natom + 1
187  ! Resize in case needed
188  IF (natom > SIZE(atom_info%id_molname)) THEN
189  newsize = int(pfactor*natom)
190  CALL reallocate(atom_info%id_molname, 1, newsize)
191  CALL reallocate(atom_info%id_resname, 1, newsize)
192  CALL reallocate(atom_info%resid, 1, newsize)
193  CALL reallocate(atom_info%id_atmname, 1, newsize)
194  CALL reallocate(atom_info%r, 1, 3, 1, newsize)
195  CALL reallocate(atom_info%atm_mass, 1, newsize)
196  CALL reallocate(atom_info%atm_charge, 1, newsize)
197  CALL reallocate(atom_info%occup, 1, newsize)
198  CALL reallocate(atom_info%beta, 1, newsize)
199  CALL reallocate(atom_info%id_element, 1, newsize)
200  END IF
201  DO ii = 1, num_fields
202  IF (ii == field_kind) THEN
203  CALL parser_get_object(parser, strtmp)
204  atom_info%id_atmname(natom) = str2id(strtmp)
205  atom_info%id_molname(natom) = str2id(s2s("MOL"//trim(adjustl(cp_to_string(natom)))))
206  atom_info%id_resname(natom) = atom_info%id_molname(natom)
207  atom_info%resid(natom) = 1
208  atom_info%id_element(natom) = atom_info%id_atmname(natom)
209  ELSE IF (ii == field_x) THEN
210  CALL cif_get_real(parser, atom_info%r(1, natom))
211  ELSE IF (ii == field_y) THEN
212  CALL cif_get_real(parser, atom_info%r(2, natom))
213  ELSE IF (ii == field_z) THEN
214  CALL cif_get_real(parser, atom_info%r(3, natom))
215  ELSE
216  CALL parser_get_object(parser, s_tag) ! Skip this field
217  END IF
218  END DO
219  s = atom_info%r(1:3, natom)
220  CALL scaled_to_real(atom_info%r(1:3, natom), s, cell)
221  CALL parser_get_next_line(parser, 1, at_end=my_end)
222  IF (my_end) EXIT
223  END DO
224  ! Preliminary check: check if atoms provided are really unique.. this is a paranoic
225  ! check since they should be REALLY unique.. anyway..
226  DO ii = 1, natom
227  r1 = atom_info%r(1:3, ii)
228  DO jj = ii + 1, natom
229  r2 = atom_info%r(1:3, jj)
230  r = pbc(r1 - r2, cell)
231  ! check = (SQRT(DOT_PRODUCT(r, r)) >= threshold)
232  check = (dot_product(r, r) >= (threshold*threshold))
233  cpassert(check)
234  END DO
235  END DO
236  ! Parse Symmetry Group and generation elements..
237  ! Check for _symmetry_space_group_name_h-m
238  CALL parser_search_string(parser, "_symmetry_space_group_name_h-m", ignore_case=.false., found=found, &
239  begin_line=.false., search_from_begin_of_file=.true.)
240  IF (found) THEN
241  IF (iw > 0) WRITE (iw, '(A)') " CIF_INFO| _symmetry_space_group_name_h-m :: "//trim(parser%input_line(parser%icol:))
242  END IF
243 
244  ! Check for _symmetry_equiv_pos_as_xyz
245  ! Check for _space_group_symop_operation_xyz
246  CALL parser_search_string(parser, "_symmetry_equiv_pos_as_xyz", ignore_case=.false., found=found, &
247  begin_line=.false., search_from_begin_of_file=.true.)
248  IF (.NOT. found) THEN
249  CALL parser_search_string(parser, "_space_group_symop_operation_xyz", ignore_case=.false., found=found, &
250  begin_line=.false., search_from_begin_of_file=.true.)
251  END IF
252  IF (.NOT. found) &
253  CALL cp_warn(__location__, "The fields (_symmetry_equiv_pos_as_xyz) or "// &
254  "(_space_group_symop_operation_xyz) were not found in CIF file!")
255  IF (iw > 0) WRITE (iw, '(A,I0)') " CIF_INFO| Number of atoms before applying symmetry operations :: ", natom
256  IF (iw > 0) WRITE (iw, '(A10,1X,3F12.6)') (trim(id2str(atom_info%id_atmname(ii))), atom_info%r(1:3, ii), ii=1, natom)
257  isym = 0
258  IF (found) THEN
259  ! Apply symmetry elements and generate the whole set of atoms in the unit cell
260  CALL parser_get_next_line(parser, 1)
261  isym = 0
262  natom_orig = natom
263  DO WHILE ((index(parser%input_line, "loop_") == 0) .AND. (parser%input_line(1:1) /= "_"))
264  isym = isym + 1
265  ! find seprator ' or "
266  sep = "'"
267  IF (index(parser%input_line(1:), '"') > 0) sep = '"'
268  iln0 = index(parser%input_line(1:), sep)
269  iln1 = index(parser%input_line(iln0 + 1:), ",") + iln0
270  iln2 = index(parser%input_line(iln1 + 1:), ",") + iln1
271  IF (iln0 == 0) THEN
272  iln3 = len_trim(parser%input_line) + 1
273  ELSE
274  iln3 = index(parser%input_line(iln2 + 1:), sep) + iln2
275  END IF
276  cpassert(iln1 /= 0)
277  cpassert(iln2 /= iln1)
278  cpassert(iln3 /= iln2)
279  CALL initf(3)
280  CALL parsef(1, trim(parser%input_line(iln0 + 1:iln1 - 1)), s2a("x", "y", "z"))
281  CALL parsef(2, trim(parser%input_line(iln1 + 1:iln2 - 1)), s2a("x", "y", "z"))
282  CALL parsef(3, trim(parser%input_line(iln2 + 1:iln3 - 1)), s2a("x", "y", "z"))
283  loop_over_unique_atoms: DO ii = 1, natom_orig
284  CALL real_to_scaled(s_tmp, atom_info%r(1:3, ii), cell)
285  s(1) = evalf(1, (/s_tmp(1), s_tmp(2), s_tmp(3)/))
286  s(2) = evalf(2, (/s_tmp(1), s_tmp(2), s_tmp(3)/))
287  s(3) = evalf(3, (/s_tmp(1), s_tmp(2), s_tmp(3)/))
288  CALL scaled_to_real(r1, s, cell)
289  check = .true.
290  DO jj = 1, natom
291  r2 = atom_info%r(1:3, jj)
292  r = pbc(r1 - r2, cell)
293  ! SQRT(DOT_PRODUCT(r, r)) <= threshold
294  IF (dot_product(r, r) <= (threshold*threshold)) THEN
295  check = .false.
296  EXIT
297  END IF
298  END DO
299  ! If the atom generated is unique let's add to the atom set..
300  IF (check) THEN
301  natom = natom + 1
302  ! Resize in case needed
303  IF (natom > SIZE(atom_info%id_molname)) THEN
304  newsize = int(pfactor*natom)
305  CALL reallocate(atom_info%id_molname, 1, newsize)
306  CALL reallocate(atom_info%id_resname, 1, newsize)
307  CALL reallocate(atom_info%resid, 1, newsize)
308  CALL reallocate(atom_info%id_atmname, 1, newsize)
309  CALL reallocate(atom_info%r, 1, 3, 1, newsize)
310  CALL reallocate(atom_info%atm_mass, 1, newsize)
311  CALL reallocate(atom_info%atm_charge, 1, newsize)
312  CALL reallocate(atom_info%occup, 1, newsize)
313  CALL reallocate(atom_info%beta, 1, newsize)
314  CALL reallocate(atom_info%id_element, 1, newsize)
315  END IF
316  atom_info%id_atmname(natom) = atom_info%id_atmname(ii)
317  atom_info%id_molname(natom) = atom_info%id_molname(ii)
318  atom_info%id_resname(natom) = atom_info%id_resname(ii)
319  atom_info%id_element(natom) = atom_info%id_element(ii)
320  atom_info%resid(natom) = atom_info%resid(ii)
321  atom_info%r(1:3, natom) = r1
322  END IF
323  END DO loop_over_unique_atoms
324  CALL finalizef()
325  CALL parser_get_next_line(parser, 1, at_end=my_end)
326  IF (my_end) EXIT
327  END DO
328  END IF
329  IF (iw > 0) WRITE (iw, '(A,I0)') " CIF_INFO| Number of symmetry operations :: ", isym
330  IF (iw > 0) WRITE (iw, '(A,I0)') " CIF_INFO| Number of total atoms :: ", natom
331  IF (iw > 0) WRITE (iw, '(A10,1X,3F12.6)') (trim(id2str(atom_info%id_atmname(ii))), atom_info%r(1:3, ii), ii=1, natom)
332 
333  ! Releasse local cell type and parser
334  CALL cell_release(cell)
335  CALL parser_release(parser)
336 
337  ! Reallocate all structures with the exact NATOM size
338  CALL reallocate(atom_info%id_molname, 1, natom)
339  CALL reallocate(atom_info%id_resname, 1, natom)
340  CALL reallocate(atom_info%resid, 1, natom)
341  CALL reallocate(atom_info%id_atmname, 1, natom)
342  CALL reallocate(atom_info%r, 1, 3, 1, natom)
343  CALL reallocate(atom_info%atm_mass, 1, natom)
344  CALL reallocate(atom_info%atm_charge, 1, natom)
345  CALL reallocate(atom_info%occup, 1, natom)
346  CALL reallocate(atom_info%beta, 1, natom)
347  CALL reallocate(atom_info%id_element, 1, natom)
348 
349  topology%natoms = natom
350  topology%molname_generated = .true.
351  CALL cp_print_key_finished_output(iw, logger, subsys_section, &
352  "PRINT%TOPOLOGY_INFO/CIF_INFO")
353  CALL timestop(handle)
354  END SUBROUTINE read_coordinate_cif
355 
356 ! **************************************************************************************************
357 !> \brief Reads REAL from the CIF file.. This wrapper is needed in order to
358 !> treat properly the accuracy specified in the CIF file, i.e. 3.45(6)
359 !> \param parser ...
360 !> \param r ...
361 !> \date 12.2008
362 !> \author Teodoro Laino [tlaino]
363 ! **************************************************************************************************
364  SUBROUTINE cif_get_real(parser, r)
365  TYPE(cp_parser_type), INTENT(INOUT) :: parser
366  REAL(kind=dp), INTENT(OUT) :: r
367 
368  CHARACTER(LEN=default_string_length) :: s_tag
369  INTEGER :: iln
370 
371  CALL parser_get_object(parser, s_tag)
372  iln = len_trim(s_tag)
373  IF (index(s_tag, "(") /= 0) iln = index(s_tag, "(") - 1
374  READ (s_tag(1:iln), *) r
375  END SUBROUTINE cif_get_real
376 
377 END MODULE topology_cif
subroutine pbc(r, r_pbc, s, s_pbc, a, b, c, alpha, beta, gamma, debug, info, pbc0, h, hinv)
...
Definition: dumpdcd.F:1203
Handles all functions related to the CELL.
Definition: cell_methods.F:15
subroutine, public write_cell(cell, subsys_section, tag)
Write the cell parameters to the output unit.
Definition: cell_methods.F:731
subroutine, public read_cell_cif(cif_file_name, cell, para_env)
Reads cell information from CIF file.
Definition: cell_methods.F:618
subroutine, public cell_create(cell, hmat, periodic, tag)
allocates and initializes a cell
Definition: cell_methods.F:85
Handles all functions related to the CELL.
Definition: cell_types.F:15
subroutine, public scaled_to_real(r, s, cell)
Transform scaled cell coordinates real coordinates. r=h*s.
Definition: cell_types.F:516
subroutine, public real_to_scaled(s, r, cell)
Transform real to scaled cell coordinates. s=h_inv*r.
Definition: cell_types.F:486
subroutine, public cell_release(cell)
releases the given cell (see doc/ReferenceCounting.html)
Definition: cell_types.F:559
various routines to log and control the output. The idea is that decisions about where to log should ...
type(cp_logger_type) function, pointer, public cp_get_default_logger()
returns the default logger
routines to handle the output, The idea is to remove the decision of wheter to output and what to out...
integer function, public cp_print_key_unit_nr(logger, basis_section, print_key_path, extension, middle_name, local, log_filename, ignore_should_output, file_form, file_position, file_action, file_status, do_backup, on_file, is_new_file, mpi_io, fout)
...
subroutine, public cp_print_key_finished_output(unit_nr, logger, basis_section, print_key_path, local, ignore_should_output, on_file, mpi_io)
should be called after you finish working with a unit obtained with cp_print_key_unit_nr,...
Utility routines to read data from files. Kept as close as possible to the old parser because.
subroutine, public parser_get_next_line(parser, nline, at_end)
Read the next input line and broadcast the input information. Skip (nline-1) lines and skip also all ...
subroutine, public parser_search_string(parser, string, ignore_case, found, line, begin_line, search_from_begin_of_file)
Search a string pattern in a file defined by its logical unit number "unit". A case sensitive search ...
Utility routines to read data from files. Kept as close as possible to the old parser because.
subroutine, public parser_release(parser)
releases the parser
subroutine, public parser_create(parser, file_name, unit_nr, para_env, end_section_label, separator_chars, comment_char, continuation_char, quote_char, section_char, parse_white_lines, initial_variables, apply_preprocessing)
Start a parser run. Initial variables allow to @SET stuff before opening the file.
This public domain function parser module is intended for applications where a set of mathematical ex...
Definition: fparser.F:17
real(rn) function, public evalf(i, Val)
...
Definition: fparser.F:180
subroutine, public finalizef()
...
Definition: fparser.F:101
subroutine, public initf(n)
...
Definition: fparser.F:130
subroutine, public parsef(i, FuncStr, Var)
Parse ith function string FuncStr and compile it into bytecode.
Definition: fparser.F:148
objects that represent the structure of input sections and the data contained in an input section
real(kind=dp) function, public section_get_rval(section_vals, keyword_name)
...
Defines the basic variable types.
Definition: kinds.F:23
integer, parameter, public dp
Definition: kinds.F:34
integer, parameter, public default_string_length
Definition: kinds.F:57
Utility routines for the memory handling.
Interface to the message passing library MPI.
generates a unique id number for a string (str2id) that can be used two compare two strings....
Definition: string_table.F:22
character(len=default_string_length) function, public s2s(str)
converts a string in a string of default_string_length
Definition: string_table.F:141
integer function, public str2id(str)
returns a unique id for a given string, and stores the string for later retrieval using the id.
Definition: string_table.F:72
character(len=default_string_length) function, public id2str(id)
returns the string associated with a given id
Definition: string_table.F:115
Utilities for string manipulations.
Handles CIF (Crystallographic Information File) files.
Definition: topology_cif.F:13
subroutine, public read_coordinate_cif(topology, para_env, subsys_section)
Performs the real task of reading the proper information from the CIF file.
Definition: topology_cif.F:88
Control for reading in different topologies and coordinates.
Definition: topology.F:13