Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1import re
2import time
3import numpy as np
5from ase.atoms import Atoms
6from ase.utils import reader, writer
7from ase.cell import Cell
9__all__ = ['read_rmc6f', 'write_rmc6f']
11ncols2style = {9: 'no_labels',
12 10: 'labels',
13 11: 'magnetic'}
16def _read_construct_regex(lines):
17 """
18 Utility for constructing regular expressions used by reader.
19 """
20 lines = [l.strip() for l in lines]
21 lines_re = '|'.join(lines)
22 lines_re = lines_re.replace(' ', r'\s+')
23 lines_re = lines_re.replace('(', r'\(')
24 lines_re = lines_re.replace(')', r'\)')
25 return '({})'.format(lines_re)
28def _read_line_of_atoms_section(fields):
29 """
30 Process `fields` line of Atoms section in rmc6f file and output extracted
31 info as atom id (key) and list of properties for Atoms object (values).
33 Parameters
34 ----------
35 fields: list[str]
36 List of columns from line in rmc6f file.
39 Returns
40 ------
41 atom_id: int
42 Atom ID
43 properties: list[str|float]
44 List of Atoms properties based on rmc6f style.
45 Basically, have 1) element and fractional coordinates for 'labels'
46 or 'no_labels' style and 2) same for 'magnetic' style except adds
47 the spin.
48 Examples for 1) 'labels' or 'no_labels' styles or 2) 'magnetic' style:
49 1) [element, xf, yf, zf]
50 2) [element, xf, yf, zf, spin]
51 """
52 # Atom id
53 atom_id = int(fields[0])
55 # Get style used for rmc6f from file based on number of columns
56 ncols = len(fields)
57 style = ncols2style[ncols]
59 # Create the position dictionary
60 properties = list()
61 element = str(fields[1])
62 if style == 'no_labels':
63 # id element xf yf zf ref_num ucell_x ucell_y ucell_z
64 xf = float(fields[2])
65 yf = float(fields[3])
66 zf = float(fields[4])
67 properties = [element, xf, yf, zf]
69 elif style == 'labels':
70 # id element label xf yf zf ref_num ucell_x ucell_y ucell_z
71 xf = float(fields[3])
72 yf = float(fields[4])
73 zf = float(fields[5])
74 properties = [element, xf, yf, zf]
76 elif style == 'magnetic':
77 # id element xf yf zf ref_num ucell_x ucell_y ucell_z M: spin
78 xf = float(fields[2])
79 yf = float(fields[3])
80 zf = float(fields[4])
81 spin = float(fields[10].strip("M:"))
82 properties = [element, xf, yf, zf, spin]
83 else:
84 raise Exception("Unsupported style for parsing rmc6f file format.")
86 return atom_id, properties
89def _read_process_rmc6f_lines_to_pos_and_cell(lines):
90 """
91 Processes the lines of rmc6f file to atom position dictionary and cell
93 Parameters
94 ----------
95 lines: list[str]
96 List of lines from rmc6f file.
98 Returns
99 ------
100 pos : dict{int:list[str|float]}
101 Dict for each atom id and Atoms properties based on rmc6f style.
102 Basically, have 1) element and fractional coordinates for 'labels'
103 or 'no_labels' style and 2) same for 'magnetic' style except adds
104 the spin.
105 Examples for 1) 'labels' or 'no_labels' styles or 2) 'magnetic' style:
106 1) pos[aid] = [element, xf, yf, zf]
107 2) pos[aid] = [element, xf, yf, zf, spin]
108 cell: Cell object
109 The ASE Cell object created from cell parameters read from the 'Cell'
110 section of rmc6f file.
111 """
113 # Inititalize output pos dictionary
114 pos = {}
116 # Defined the header an section lines we process
117 header_lines = [
118 "Number of atoms:",
119 "Supercell dimensions:",
120 "Cell (Ang/deg):",
121 "Lattice vectors (Ang):"]
122 sections = ["Atoms"]
124 # Construct header and sections regex
125 header_lines_re = _read_construct_regex(header_lines)
126 sections_re = _read_construct_regex(sections)
128 section = None
129 header = True
131 # Remove any lines that are blank
132 lines = [line for line in lines if line != '']
134 # Process each line of rmc6f file
135 pos = {}
136 for line in lines:
138 # check if in a section
139 m = re.match(sections_re, line)
140 if m is not None:
141 section = m.group(0).strip()
142 header = False
143 continue
145 # header section
146 if header:
147 field = None
148 val = None
150 # Regex that matches whitespace-separated floats
151 float_list_re = r'\s+(\d[\d|\s\.]+[\d|\.])'
152 m = re.search(header_lines_re + float_list_re, line)
153 if m is not None:
154 field = m.group(1)
155 val = m.group(2)
157 if field is not None and val is not None:
159 if field == "Number of atoms:":
160 pass
161 """
162 NOTE: Can just capture via number of atoms ingested.
163 Maybe use in future for a check.
164 code: natoms = int(val)
165 """
167 if field.startswith('Supercell'):
168 pass
169 """
170 NOTE: wrapping back down to unit cell is not
171 necessarily needed for ASE object.
173 code: supercell = [int(x) for x in val.split()]
174 """
176 if field.startswith('Cell'):
177 cellpar = [float(x) for x in val.split()]
178 cell = Cell.fromcellpar(cellpar)
180 if field.startswith('Lattice'):
181 pass
182 """
183 NOTE: Have questions about RMC fractionalization matrix for
184 conversion in data2config vs. the ASE matrix.
185 Currently, just support the Cell section.
186 """
188 # main body section
189 if section is not None:
190 if section == 'Atoms':
191 atom_id, atom_props = _read_line_of_atoms_section(line.split())
192 pos[atom_id] = atom_props
194 return pos, cell
197def _write_output_column_format(columns, arrays):
198 """
199 Helper function to build output for data columns in rmc6f format
201 Parameters
202 ----------
203 columns: list[str]
204 List of keys in arrays. Will be columns in the output file.
205 arrays: dict{str:np.array}
206 Dict with arrays for each column of rmc6f file that are
207 property of Atoms object.
209 Returns
210 ------
211 property_ncols : list[int]
212 Number of columns for each property.
213 dtype_obj: np.dtype
214 Data type object for the columns.
215 formats_as_str: str
216 The format for printing the columns.
218 """
219 fmt_map = {'d': ('R', '%14.6f '),
220 'f': ('R', '%14.6f '),
221 'i': ('I', '%8d '),
222 'O': ('S', '%s'),
223 'S': ('S', '%s'),
224 'U': ('S', '%s'),
225 'b': ('L', ' %.1s ')}
227 property_types = []
228 property_ncols = []
229 dtypes = []
230 formats = []
232 # Take each column and setup formatting vectors
233 for column in columns:
234 array = arrays[column]
235 dtype = array.dtype
237 property_type, fmt = fmt_map[dtype.kind]
238 property_types.append(property_type)
240 # Flags for 1d vectors
241 is_1d = len(array.shape) == 1
242 is_1d_as_2d = len(array.shape) == 2 and array.shape[1] == 1
244 # Construct the list of key pairs of column with dtype
245 if (is_1d or is_1d_as_2d):
246 ncol = 1
247 dtypes.append((column, dtype))
248 else:
249 ncol = array.shape[1]
250 for c in range(ncol):
251 dtypes.append((column + str(c), dtype))
253 # Add format and number of columns for this property to output array
254 formats.extend([fmt] * ncol)
255 property_ncols.append(ncol)
257 # Prepare outputs to correct data structure
258 dtype_obj = np.dtype(dtypes)
259 formats_as_str = ''.join(formats) + '\n'
261 return property_ncols, dtype_obj, formats_as_str
264def _write_output(filename, header_lines, data, fmt, order=None):
265 """
266 Helper function to write information to the filename
268 Parameters
269 ----------
270 filename : file|str
271 A file like object or filename
272 header_lines : list[str]
273 Header section of output rmc6f file
274 data: np.array[len(atoms)]
275 Array for the Atoms section to write to file. Has
276 the columns that need to be written on each row
277 fmt: str
278 The format string to use for writing each column in
279 the rows of data.
280 order : list[str]
281 If not None, gives a list of atom types for the order
282 to write out each.
283 """
284 fd = filename
286 # Write header section
287 for line in header_lines:
288 fd.write("%s \n" % line)
290 # If specifying the order, fix the atom id and write to file
291 natoms = data.shape[0]
292 if order is not None:
293 new_id = 0
294 for atype in order:
295 for i in range(natoms):
296 if atype == data[i][1]:
297 new_id += 1
298 data[i][0] = new_id
299 fd.write(fmt % tuple(data[i]))
300 # ...just write rows to file
301 else:
302 for i in range(natoms):
303 fd.write(fmt % tuple(data[i]))
306@reader
307def read_rmc6f(filename, atom_type_map=None):
308 """
309 Parse a RMCProfile rmc6f file into ASE Atoms object
311 Parameters
312 ----------
313 filename : file|str
314 A file like object or filename.
315 atom_type_map: dict{str:str}
316 Map of atom types for conversions. Mainly used if there is
317 an atom type in the file that is not supported by ASE but
318 want to map to a supported atom type instead.
320 Example to map deuterium to hydrogen:
321 atom_type_map = { 'D': 'H' }
323 Returns
324 ------
325 structure : Atoms
326 The Atoms object read in from the rmc6f file.
327 """
329 fd = filename
330 lines = fd.readlines()
332 # Process the rmc6f file to extract positions and cell
333 pos, cell = _read_process_rmc6f_lines_to_pos_and_cell(lines)
335 # create an atom type map if one does not exist from unique atomic symbols
336 if atom_type_map is None:
337 symbols = [atom[0] for atom in pos.values()]
338 atom_type_map = {atype: atype for atype in symbols}
340 # Use map of tmp symbol to actual symbol
341 for atom in pos.values():
342 atom[0] = atom_type_map[atom[0]]
344 # create Atoms from read-in data
345 symbols = []
346 scaled_positions = []
347 spin = None
348 magmoms = []
349 for atom in pos.values():
350 if len(atom) == 4:
351 element, x, y, z = atom
352 else:
353 element, x, y, z, spin = atom
355 element = atom_type_map[element]
356 symbols.append(element)
357 scaled_positions.append([x, y, z])
358 if spin is not None:
359 magmoms.append(spin)
361 atoms = Atoms(scaled_positions=scaled_positions,
362 symbols=symbols,
363 cell=cell,
364 magmoms=magmoms,
365 pbc=[True, True, True])
367 return atoms
370@writer
371def write_rmc6f(filename, atoms, order=None, atom_type_map=None):
372 """
373 Write output in rmc6f format - RMCProfile v6 fractional coordinates
375 Parameters
376 ----------
377 filename : file|str
378 A file like object or filename.
379 atoms: Atoms object
380 The Atoms object to be written.
382 order : list[str]
383 If not None, gives a list of atom types for the order
384 to write out each.
385 atom_type_map: dict{str:str}
386 Map of atom types for conversions. Mainly used if there is
387 an atom type in the Atoms object that is a placeholder
388 for a different atom type. This is used when the atom type
389 is not supported by ASE but is in RMCProfile.
391 Example to map hydrogen to deuterium:
392 atom_type_map = { 'H': 'D' }
393 """
395 # get atom types and how many of each (and set to order if passed)
396 atom_types = set(atoms.symbols)
397 if order is not None:
398 if set(atom_types) != set(order):
399 raise Exception("The order is not a set of the atom types.")
400 atom_types = order
402 atom_count_dict = atoms.symbols.formula.count()
403 natom_types = [str(atom_count_dict[atom_type]) for atom_type in atom_types]
405 # create an atom type map if one does not exist from unique atomic symbols
406 if atom_type_map is None:
407 symbols = set(np.array(atoms.symbols))
408 atom_type_map = {atype: atype for atype in symbols}
410 # HEADER SECTION
412 # get type and number of each atom type
413 atom_types_list = [atom_type_map[atype] for atype in atom_types]
414 atom_types_present = ' '.join(atom_types_list)
415 natom_types_present = ' '.join(natom_types)
417 header_lines = [
418 "(Version 6f format configuration file)",
419 "(Generated by ASE - Atomic Simulation Environment https://wiki.fysik.dtu.dk/ase/ )", # noqa: E501
420 "Metadata date: " + time.strftime('%d-%m-%Y'),
421 "Number of types of atoms: {} ".format(len(atom_types)),
422 "Atom types present: {}".format(atom_types_present),
423 "Number of each atom type: {}".format(natom_types_present),
424 "Number of moves generated: 0",
425 "Number of moves tried: 0",
426 "Number of moves accepted: 0",
427 "Number of prior configuration saves: 0",
428 "Number of atoms: {}".format(len(atoms)),
429 "Supercell dimensions: 1 1 1"]
431 # magnetic moments
432 if atoms.has('magmoms'):
433 spin_str = "Number of spins: {}"
434 spin_line = spin_str.format(len(atoms.get_initial_magnetic_moments()))
435 header_lines.extend([spin_line])
437 density_str = "Number density (Ang^-3): {}"
438 density_line = density_str.format(len(atoms) / atoms.get_volume())
439 cell_angles = [str(x) for x in atoms.cell.cellpar()]
440 cell_line = "Cell (Ang/deg): " + ' '.join(cell_angles)
441 header_lines.extend([density_line, cell_line])
443 # get lattice vectors from cell lengths and angles
444 # NOTE: RMCProfile uses a different convention for the fractionalization
445 # matrix
447 cell_parameters = atoms.cell.cellpar()
448 cell = Cell.fromcellpar(cell_parameters).T
449 x_line = ' '.join(['{:12.6f}'.format(i) for i in cell[0]])
450 y_line = ' '.join(['{:12.6f}'.format(i) for i in cell[1]])
451 z_line = ' '.join(['{:12.6f}'.format(i) for i in cell[2]])
452 lat_lines = ["Lattice vectors (Ang):", x_line, y_line, z_line]
453 header_lines.extend(lat_lines)
454 header_lines.extend(['Atoms:'])
456 # ATOMS SECTION
458 # create columns of data for atoms (fr_cols)
459 fr_cols = ['id', 'symbols', 'scaled_positions', 'ref_num', 'ref_cell']
460 if atoms.has('magmoms'):
461 fr_cols.extend('magmom')
463 # Collect data to be written out
464 natoms = len(atoms)
466 arrays = {}
467 arrays['id'] = np.array(range(1, natoms + 1, 1), int)
468 arrays['symbols'] = np.array(atoms.symbols)
469 arrays['ref_num'] = np.zeros(natoms, int)
470 arrays['ref_cell'] = np.zeros((natoms, 3), int)
471 arrays['scaled_positions'] = np.array(atoms.get_scaled_positions())
473 # get formatting for writing output based on atom arrays
474 ncols, dtype_obj, fmt = _write_output_column_format(fr_cols, arrays)
476 # Pack fr_cols into record array
477 data = np.zeros(natoms, dtype_obj)
478 for column, ncol in zip(fr_cols, ncols):
479 value = arrays[column]
480 if ncol == 1:
481 data[column] = np.squeeze(value)
482 else:
483 for c in range(ncol):
484 data[column + str(c)] = value[:, c]
486 # Use map of tmp symbol to actual symbol
487 for i in range(natoms):
488 data[i][1] = atom_type_map[data[i][1]]
490 # Write the output
491 _write_output(filename, header_lines, data, fmt, order=order)