(git:34ef472)
string_table.F
Go to the documentation of this file.
1 !--------------------------------------------------------------------------------------------------!
2 ! CP2K: A general program to perform molecular dynamics simulations !
3 ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 ! !
5 ! SPDX-License-Identifier: GPL-2.0-or-later !
6 !--------------------------------------------------------------------------------------------------!
7 
8 ! **************************************************************************************************
9 !> \brief generates a unique id number for a string (str2id) that can be used
10 !> two compare two strings. I.e.
11 !> if (str1==str2) => str2id(str1)==str2id(str2)
12 !> if (str1.NE.str2) => str2id(str1).NE.str2id(str2)
13 !> and the other way around. Given an id, the string can be retrieved.
14 !> \note
15 !> the purpose of this routine is to speed up string handling,
16 !> string searching, ... as an operation on an int is much faster than an
17 !> operation on a long string.
18 !> \par History
19 !> 9.2006 [Joost VandeVondele]
20 !> \author Joost VandeVondele
21 ! **************************************************************************************************
23 
24  USE kinds, ONLY: default_string_length,&
25  int_8
26 #include "../base/base_uses.f90"
27 
28  IMPLICIT NONE
29 
30  ! user functions
31  PUBLIC :: str2id, id2str, s2s
32 
33  ! setup function
35 
36  PRIVATE
37  ! For good performance, the hash table should be larger than the largest number
38  ! of strings that will be saved, but the memory for an empty table is 16*hash_table_size
39  ! the string_table should remain functional for up to ~ 2**32 strings
40  INTEGER, PARAMETER :: Nbit = 16
41  INTEGER, PARAMETER :: hash_table_size = 2**nbit
42 
43  ! actual elements in the hash table
44  INTEGER, SAVE :: actual_strings
45  INTEGER, SAVE :: inserted_strings
46 
47  ! an element of the linked list of hashed strings
48 ! **************************************************************************************************
49  TYPE hash_element_type
50  CHARACTER(LEN=default_string_length), POINTER :: str => null()
51  TYPE(hash_element_type), POINTER :: next => null()
52  END TYPE
53 
54  ! the array of linked lists of hashed strings
55  TYPE(hash_element_type), SAVE, ALLOCATABLE, TARGET, DIMENSION(:) :: hash_table
56 
57 CONTAINS
58 
59 ! **************************************************************************************************
60 !> \brief returns a unique id for a given string, and stores the string for
61 !> later retrieval using the id.
62 !> \param str the string to be stored (default_string_length)
63 !> \return ...
64 !> \par History
65 !> 09.2006 created [Joost VandeVondele]
66 !> \note
67 !> pass literal strings using the s2s function,
68 !> which converts strings of any length to default_string_length
69 !> id=str2id(s2s("my short string"))
70 ! **************************************************************************************************
71  FUNCTION str2id(str) RESULT(id)
72  CHARACTER(LEN=*) :: str
73  INTEGER :: id
74 
75  INTEGER :: index, ipos
76  TYPE(hash_element_type), POINTER :: this
77 
78  inserted_strings = inserted_strings + 1
79  ! index is the index in the array, ipos is the Nth element of the linked list
80  index = joaat_hash(str)
81  ipos = 0
82  this => hash_table(index)
83  DO ! walk the list
84  IF (.NOT. ASSOCIATED(this%str)) THEN
85  ! str was not in the linked list, add it now
86  ALLOCATE (this%str)
87  this%str = str
88  actual_strings = actual_strings + 1
89  EXIT
90  ELSE
91  IF (this%str == str) THEN
92  ! str is in the list already
93  EXIT
94  ELSE
95  IF (.NOT. ASSOCIATED(this%next)) ALLOCATE (this%next)
96  ipos = ipos + 1
97  this => this%next
98  END IF
99  END IF
100  END DO
101  id = ior(index, ishft(ipos, nbit))
102  END FUNCTION str2id
103 
104 ! **************************************************************************************************
105 !> \brief returns the string associated with a given id
106 !> \param id the id to be converted into a string
107 !> \return ...
108 !> \par History
109 !> 09.2006 created [Joost VandeVondele]
110 !> \note
111 !> only id's of previously 'registered' strings (str2id) should be passed,
112 !> otherwise things crash
113 ! **************************************************************************************************
114  FUNCTION id2str(id) RESULT(str)
115  INTEGER :: id
116  CHARACTER(LEN=default_string_length) :: str
117 
118  INTEGER :: i, index, ipos
119  TYPE(hash_element_type), POINTER :: this
120 
121  index = iand(id, 2**nbit - 1)
122  ipos = ishft(id, -nbit)
123  this => hash_table(index)
124  DO i = 1, ipos
125  this => this%next
126  END DO
127  str = this%str
128  END FUNCTION id2str
129 
130 ! **************************************************************************************************
131 !> \brief converts a string in a string of default_string_length
132 !> \param str ...
133 !> \return ...
134 !> \par History
135 !> 09.2006 created [Joost VandeVondele]
136 !> \note
137 !> useful to pass a literal string to str2id
138 !> i.e. id=str2id(s2s("X"))
139 ! **************************************************************************************************
140  FUNCTION s2s(str) RESULT(res)
141  CHARACTER(LEN=*) :: str
142  CHARACTER(LEN=default_string_length) :: res
143 
144  res = str
145  END FUNCTION s2s
146 
147 ! **************************************************************************************************
148 !> \brief allocates the string table
149 !> \par History
150 !> 09.2006 created [Joost VandeVondele]
151 !> \note
152 !> this needs to be done only once at program startup, before any use
153 !> of other procedures of this module. The scope of this table is global
154 ! **************************************************************************************************
156  ALLOCATE (hash_table(0:hash_table_size - 1))
157  actual_strings = 0
158  inserted_strings = 0
159  END SUBROUTINE string_table_allocate
160 
161 ! **************************************************************************************************
162 !> \brief deallocates the string table
163 !> \param iw a unit to which some info about the table usage can be printed
164 !> \par History
165 !> 09.2006 created [Joost VandeVondele]
166 !> \note
167 !> This should be done before program termination, all associated ids become meaningless
168 ! **************************************************************************************************
169  SUBROUTINE string_table_deallocate(iw)
170  INTEGER, INTENT(IN) :: iw
171 
172  INTEGER :: i, ilist, ipos, ipos_max
173  TYPE(hash_element_type), POINTER :: next, this
174 
175 ! clean up all the linked lists of entries
176 
177  ipos_max = 0
178  ilist = 0
179  DO i = 0, hash_table_size - 1
180  ipos = 1
181  IF (ASSOCIATED(hash_table(i)%str)) THEN
182  DEALLOCATE (hash_table(i)%str)
183  ilist = ilist + 1
184  END IF
185  this => hash_table(i)%next
186  DO WHILE (ASSOCIATED(this))
187  ipos = ipos + 1
188  next => this%next
189  IF (ASSOCIATED(this%str)) DEALLOCATE (this%str)
190  DEALLOCATE (this)
191  this => next
192  END DO
193  ipos_max = max(ipos_max, ipos)
194  END DO
195  DEALLOCATE (hash_table)
196  IF (iw > 0) THEN
197  WRITE (iw, *) "string table: # inserted str = ", inserted_strings
198  WRITE (iw, *) " # actual = ", actual_strings
199  WRITE (iw, *) " # lists = ", ilist, " / ", hash_table_size
200  WRITE (iw, *) " longest list = ", ipos_max
201  END IF
202  actual_strings = 0
203  inserted_strings = 0
204  END SUBROUTINE string_table_deallocate
205 
206 ! **************************************************************************************************
207 !> \brief generates the hash of a string and the index in the table
208 !> \param key a string of any length
209 !> \return ...
210 !> \par History
211 !> 09.2006 created [Joost VandeVondele]
212 !> \note
213 !> http://en.wikipedia.org/wiki/Hash_table
214 !> http://www.burtleburtle.net/bob/hash/doobs.html
215 !> However, since fortran doesn't have an unsigned 4 byte int
216 !> we compute it using an integer with the appropriate range
217 !> we return already the index in the table as a final result
218 ! **************************************************************************************************
219  FUNCTION joaat_hash(key) RESULT(hash_index)
220  CHARACTER(LEN=*), INTENT(IN) :: key
221  INTEGER :: hash_index
222 
223  INTEGER(KIND=int_8), PARAMETER :: b32 = 2_int_8**32 - 1_int_8
224 
225  INTEGER :: i
226  INTEGER(KIND=int_8) :: hash
227 
228  hash = 0_int_8
229  DO i = 1, len(key)
230  hash = iand(hash + ichar(key(i:i)), b32)
231  hash = iand(hash + iand(ishft(hash, 10), b32), b32)
232  hash = iand(ieor(hash, iand(ishft(hash, -6), b32)), b32)
233  END DO
234  hash = iand(hash + iand(ishft(hash, 3), b32), b32)
235  hash = iand(ieor(hash, iand(ishft(hash, -11), b32)), b32)
236  hash = iand(hash + iand(ishft(hash, 15), b32), b32)
237  ! hash is the real 32bit hash value of the string,
238  ! hash_index is an index in the hash_table
239  hash_index = int(mod(hash, int(hash_table_size, kind=int_8)))
240  END FUNCTION joaat_hash
241 END MODULE string_table
static unsigned int hash(const dbm_task_t task)
Private hash function based on Szudzik's elegant pairing. Using unsigned int to return a positive num...
Defines the basic variable types.
Definition: kinds.F:23
integer, parameter, public int_8
Definition: kinds.F:54
integer, parameter, public default_string_length
Definition: kinds.F:57
generates a unique id number for a string (str2id) that can be used two compare two strings....
Definition: string_table.F:22
character(len=default_string_length) function, public s2s(str)
converts a string in a string of default_string_length
Definition: string_table.F:141
integer function, public str2id(str)
returns a unique id for a given string, and stores the string for later retrieval using the id.
Definition: string_table.F:72
character(len=default_string_length) function, public id2str(id)
returns the string associated with a given id
Definition: string_table.F:115
subroutine, public string_table_deallocate(iw)
deallocates the string table
Definition: string_table.F:170
subroutine, public string_table_allocate()
allocates the string table
Definition: string_table.F:156