51 #include <basic/options/option.hh>
52 #include <basic/Tracer.hh>
55 #include <basic/options/option.hh>
56 #include <basic/options/keys/out.OptionKeys.gen.hh>
57 #include <basic/options/keys/run.OptionKeys.gen.hh>
58 #include <basic/options/keys/in.OptionKeys.gen.hh>
59 #include <basic/options/keys/inout.OptionKeys.gen.hh>
60 #include <basic/options/keys/packing.OptionKeys.gen.hh>
63 #include <numeric/random/random.hh>
65 #include <utility/string_util.hh>
66 #include <utility/io/ozstream.hh>
67 #include <utility/io/izstream.hh>
68 #include <utility/exit.hh>
75 #include <ObjexxFCL/format.hh>
78 #include <basic/options/keys/out.OptionKeys.gen.hh>
81 #include <basic/options/keys/inout.OptionKeys.gen.hh>
86 #include <utility/vector1.hh>
100 using basic::Warning;
105 using namespace ObjexxFCL;
106 using namespace ObjexxFCL::fmt;
109 static basic::Tracer
TR(
"core.io.pdb.file_data");
112 static numeric::random::RandomGenerator
RG(231411);
116 static string const chr_chains(
"ABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890abcdefghijklmnopqrstuvwxyz" );
133 resName( ai.resName ),
137 terCount( ai.terCount ),
157 return !(*
this == that);
174 using namespace core;
182 && !(pdb_info->obsolete())
183 && !(basic::options::option[ basic::options::OptionKeys::out::file::renumber_pdb ].value()) ) {
187 bool renumber_chains(
false);
188 if ( basic::options::option[ basic::options::OptionKeys::out::file::per_chain_renumbering ].value() ) {
189 renumber_chains =
true;
197 if ( !basic::options::option[ basic::options::OptionKeys::out::file::output_virtual ]() &&
201 if ( use_PDB && basic::options::option[ basic::options::OptionKeys::out::file::suppress_zero_occ_pdb_output ]() &&
202 ( rsd.
seqpos() <= pdb_info->nres() ) ) {
203 if ( pdb_info->occupancy( rsd.
seqpos(), j ) < 0.0001 )
continue;
215 ai.
x = atom.xyz()(1);
216 ai.
y = atom.xyz()(2);
217 ai.
z = atom.xyz()(3);
221 if ( use_PDB && rsd.
seqpos() <= pdb_info->nres() ) {
226 <<
"' denoting empty record, for convenience replacing with space" << std::endl;
233 if ( pdb_info->is_het( rsd.
seqpos(), j ) ) {
241 runtime_assert( rsd.
chain() > 0 );
246 if ( renumber_chains ) {
249 for (
Size i=1; i<=chn_ends.size(); ++i ) {
298 std::vector<Record> & VR
321 link.
name1_ = record[
"name1"].value;
322 link.resName1_ = record[
"resName1"].value;
323 link.resID1_ = record[
"resSeq1"].value + record[
"iCode1"].value + record[
"chainID1"].value;
325 link.name2_ = record[
"name2"].value;
326 link.resName2_ = record[
"resName2"].value;
327 link.resID2_ = record[
"resSeq2"].value + record[
"iCode2"].value + record[
"chainID2"].value;
329 link.length_ = atof(record[
"length"].value.c_str());
331 links[link.resID1_] = link;
333 TR.Debug <<
"LINK record information stored successfully." << std::endl;
344 using namespace core::chemical::carbohydrates;
347 TR.Warning <<
"PDB HETNAM record is missing an heterogen ID field." << endl;
351 TR.Warning <<
"PDB HETNAM chemical name field is an empty string." << endl;
358 name.append(rstripped_whitespace(text));
365 if (CarbohydrateInfo::CODE_TO_ROOT_MAP.count(hetID)) {
388 string resSeq =
string(text.begin() + 1, text.begin() + 5);
389 string iCode =
string(text.begin() + 5, text.begin() + 6);
392 string needed_residue_type_base_name =
string(text.begin() + 7, text.end());
403 using namespace core;
411 if(pose.
pdb_info()->header_information()){
420 for (
Size i=1; i<= nres; ++i ) {
434 using namespace core;
440 end( residue_indices.end() ); index !=
end; ++index ) {
441 if ( *index < 1 || *index > nres ) { runtime_assert(
false);
continue; }
460 out.write( data.c_str(), data.size() );
469 string const & file_name,
471 bool write_fold_tree)
473 utility::io::ozstream file(file_name.c_str(), std::ios::out | std::ios::binary);
475 Error() <<
"FileData::dump_pdb: Unable to open file:" << file_name <<
" for writing!!!" << std::endl;
478 dump_pdb(pose, file, tag, write_fold_tree);
502 out.write( data.c_str(), data.size() );
517 os <<
"Chain<" << i <<
">";
519 os <<
"[" << j <<
":" << fd.
chains[i][j] <<
"]" <<
"\n";
531 if ( name ==
" DA" )
return " A";
532 else if ( name ==
" DC" )
return " C";
533 else if ( name ==
" DG" )
return " G";
534 else if ( name ==
" DT" )
return " T";
535 else if ( name ==
" Ad" )
return " A";
536 else if ( name ==
" Cd" )
return " C";
537 else if ( name ==
" Gd" )
return " G";
538 else if ( name ==
" Td" )
return " T";
539 else if ( name ==
"MSE" ) {
540 TR <<
"Reading MSE as MET!" << std::endl;
549 if( atom_name.size() != 4 ){
550 std::string message= res_name+
" has atom "+ atom_name+
", with size!=4";
551 utility_exit_with_message(message);
554 if ( res_name ==
"5MC" ||
561 if ( atom_name ==
" OP1" )
return " O1P";
562 if ( atom_name ==
" OP2" )
return " O2P";
563 if ( atom_name[3] ==
'\'' )
return atom_name.substr(0,3)+
"*";
564 if ( res_name ==
" T" && atom_name ==
" C7 " )
return " C5M";
565 }
else if ( res_name ==
"MET" && atom_name ==
" S " ) {
567 }
else if ( res_name ==
"MET" && atom_name ==
"SE " ) {
568 TR <<
"Reading Selenium SE from MSE as SD from MET" << std::endl;
593 using namespace basic::options;
594 using namespace basic::options::OptionKeys;
620 TR.Warning <<
"PDB reader is ignoring atom " << atom_name <<
" in residue " << resid
621 <<
". Pass flag -ignore_zero_occupancy false to change this behavior" << std::endl;
631 new_res.
resid = resid;
632 if( rinfo.size() == 0 || rinfo.back() != new_res ) rinfo.push_back(new_res);
636 if( curr_res.
xyz.count(ai.
name) == 0 ) {
637 curr_res.
atoms.push_back(ai);
639 curr_res.
xyz[ ai.
name ] = coords;
652 left_justify( trimmed_name ); trim( trimmed_name );
664 if ( ai.
x == 0.000 && ai.
y == 0.000 && ai.
z == 0.000 && ai.
occupancy <= 0.0 ){
668 ai.
x = ai.
x + 900.000 +
RG.uniform()*100.000;
669 ai.
y = ai.
y + 900.000 +
RG.uniform()*100.000;
670 ai.
z = ai.
z + 900.000 +
RG.uniform()*100.000;
686 using namespace basic::options;
693 if ( write_fold_tree || option[ OptionKeys::inout::fold_tree_io ].user() ) {
696 if ( basic::options::option[ OptionKeys::out::file::pdb_parents]() ) {
700 out <<
"REMARK PARENT " << value.substr(0,5) << std::endl;
703 if(basic::options::option[ basic::options::OptionKeys::out::file::output_orbitals]){
708 char const chain( chains[ rsd.chain() ] );
710 if(rsd.atom_type(j).atom_has_orbital()){
714 orbital_index = orbital_indices.begin(),
715 orbital_index_end = orbital_indices.end();
716 orbital_index != orbital_index_end; ++orbital_index
719 Vector orbital_xyz(rsd.orbital_xyz(*orbital_index));
720 out <<
"ATOM " << I(5,number) <<
' ' << rsd.orbital_name(*orbital_index) <<
' ' <<
721 rsd.name3() <<
' ' << chain << I(4,rsd.seqpos() ) <<
" " <<
722 F(8,3,orbital_xyz.x()) <<
723 F(8,3,orbital_xyz.y()) <<
724 F(8,3,orbital_xyz.z()) <<
725 F(6,2,1.0) << F(6,2,1.0) <<
'\n';
731 if (basic::options::option[ basic::options::OptionKeys::out::file::output_torsions ]){
733 TR <<
"Ignoring out::file::output_torsions option because pose is non-ideal!" << std::endl;
735 ObjexxFCL::FArray1D_char dssp_reduced_secstruct(pose.
n_residue());
737 out <<
"REMARK torsions: res pdbres pdbchain seq dssp phi psi omega" << std::endl;
739 out <<
"REMARK " << I( 4, i ) <<
" " << I( 4, pose.
pdb_info()->number(i)) <<
" " << pose.
pdb_info()->chain(i) <<
" " << pose.
residue( i ).
name1() <<
" " <<
740 dssp_reduced_secstruct(i) <<
" " << F( 9, 3, pose.
phi(i)) <<
" " << F( 9, 3, pose.
psi(i)) <<
" " << F( 9, 3, pose.
omega(i)) << std::endl;
763 using namespace chemical;
788 utility::io::izstream file( filename );
790 TR.Error <<
"File:" << filename <<
" not found!" << std::endl;
791 utility_exit_with_message(
"Cannot open file " + filename );
793 TR.Debug <<
"read file: " << filename << std::endl;
796 utility::slurp( file, all_lines );
830 typedef std::map< std::string, ResidueTemps >
Temps;
832 typedef std::map< std::string, ResidueCoords >
Coords;
835 using namespace chemical;
836 using namespace conformation;
853 Strings branch_lower_termini;
855 int const nres_pdb( rinfos.size() );
863 std::string::const_iterator
const entities_begin = chains_whose_residues_are_separate_chemical_entities.begin();
864 std::string::const_iterator
const entities_end = chains_whose_residues_are_separate_chemical_entities.end();
867 std::string::const_iterator
const check_Ntermini_begin = chains_to_check_if_Ntermini.begin();
868 std::string::const_iterator
const check_Ntermini_end = chains_to_check_if_Ntermini.end();
870 std::string::const_iterator
const check_Ctermini_begin = chains_to_check_if_Ctermini.begin();
871 std::string::const_iterator
const check_Ctermini_end = chains_to_check_if_Ctermini.end();
875 bool last_residue_was_recognized(
true);
879 for (
int i=1; i<= nres_pdb; ++i ) {
885 runtime_assert( resid.size() == 6 );
886 bool const separate_chemical_entity = find(entities_begin, entities_end, chainID ) != entities_end;
887 bool const same_chain_prev = ( i > 1 && chainID == rinfos[i-1].chainID &&
888 rinfo.
terCount == rinfos[i-1].terCount && !separate_chemical_entity);
889 bool const same_chain_next = ( i < nres_pdb && chainID == rinfos[i+1].chainID &&
890 rinfo.
terCount == rinfos[i+1].terCount && !separate_chemical_entity);
891 bool const check_Ntermini_for_this_chain = (
"ALL" == chains_to_check_if_Ntermini) ?
892 true : find(check_Ntermini_begin, check_Ntermini_end, chainID ) == check_Ntermini_end;
893 bool const check_Ctermini_for_this_chain = (
"ALL" == chains_to_check_if_Ctermini) ?
894 true : find(check_Ctermini_begin, check_Ctermini_end, chainID ) == check_Ctermini_end;
897 bool const is_branch_point = fd.
links.count(resid);
898 if (is_branch_point) {
900 branch_lower_termini.push_back(fd.
links[resid].resID2_);
902 bool const is_branch_lower_terminus = branch_lower_termini.contains(resid);
903 bool const is_lower_terminus( ( i == 1 || rinfos.empty() || (!same_chain_prev && !is_branch_lower_terminus) ) && check_Ntermini_for_this_chain );
904 bool const is_upper_terminus( ( i == nres_pdb || !same_chain_next ) && check_Ctermini_for_this_chain );
906 TR.Debug <<
"Residue " << i << std::endl;
907 if (is_lower_terminus) {
908 TR.Debug <<
"...is a lower terminus." << std::endl;
910 if (is_upper_terminus) {
911 TR.Debug <<
"...is an upper terminus." << std::endl;
913 if (is_branch_point) {
914 TR.Debug <<
"...is a branch point." << std::endl;
916 if (is_branch_lower_terminus) {
917 TR.Debug <<
"...is the lower terminus of a branch." << std::endl;
920 ResidueCoords
const &
xyz = rinfo.
xyz;
921 ResidueTemps
const & rtemp = rinfo.
temps;
926 i, pdb_name, rsd_type_list, xyz, rtemp,
927 UA_res_nums, UA_res_names, UA_atom_names, UA_coords, UA_temps, options)) {
928 last_residue_was_recognized =
false;
935 Size best_index(0), best_rsd_missing( 99999 ), best_xyz_missing( 99999 );
938 for (
Size j=1; j<= rsd_type_list.size(); ++j ) {
939 ResidueType const & rsd_type( *(rsd_type_list[j]) );
940 bool const is_polymer( rsd_type.
is_polymer() );
950 TR.Debug <<
"Discarding '" << rsd_type.
name() <<
"' ResidueType" << std::endl;
955 TR.Debug <<
"Discarding '" << rsd_type.
name() <<
"' ResidueType" << std::endl;
960 TR.Debug <<
"Discarding '" << rsd_type.
name() <<
"' ResidueType" << std::endl;
965 TR.Debug <<
"Discarding '" << rsd_type.
name() <<
"' ResidueType" << std::endl;
971 TR.Debug <<
"Discarding '" << rsd_type.
name() <<
"' ResidueType" << std::endl;
976 TR.Debug <<
"Discarding '" << rsd_type.
name() <<
"' ResidueType" << std::endl;
981 TR.Debug <<
"Trying '" << rsd_type.
name() <<
"' ResidueType" << std::endl;
983 Size rsd_missing(0), xyz_missing(0);
985 for (
Size k=1; k<= rsd_type.
natoms(); ++k ) {
986 if ( xyz.count( rsd_type.
atom_name(k) ) == 0 ) ++xyz_missing;
989 for ( ResidueCoords::const_iterator iter=xyz.begin(), iter_end=xyz.end(); iter!= iter_end; ++iter ) {
991 !( iter->first ==
" H " && is_lower_terminus ) ) {
996 if ( ( rsd_missing < best_rsd_missing ) ||
997 ( rsd_missing == best_rsd_missing && xyz_missing < best_xyz_missing ) ) {
998 best_rsd_missing = rsd_missing;
999 best_xyz_missing = xyz_missing;
1009 utility_exit_with_message(
"Unrecognized residue: " + pdb_name );
1012 ResidueType const & rsd_type( *(rsd_type_list[ best_index ]) );
1013 TR.Debug <<
"Match: '" << rsd_type.
name() <<
"'; missing " << best_xyz_missing <<
" coordinates" << std::endl;
1015 if ( best_rsd_missing ) {
1016 TR <<
"[ WARNING ] discarding " << best_rsd_missing <<
" atoms at position " << i <<
1017 " in file " << fd.
filename <<
". Best match rsd_type: " << rsd_type.
name() << std::endl;
1023 Size const nbb( mainchain.size() );
1025 bool mainchain_core_present(
false );
1026 for (
Size k=1; k<= nbb-2; ++k ) {
1027 if ( xyz.count( rsd_type.
atom_name(mainchain[k ])) &&
1028 xyz.count( rsd_type.
atom_name(mainchain[k+1])) &&
1029 xyz.count( rsd_type.
atom_name(mainchain[k+2])) ) {
1030 mainchain_core_present =
true;
1034 if ( !mainchain_core_present ) {
1035 TR <<
"[ WARNING ] skipping pdb residue b/c its missing too many mainchain atoms: " << resid <<
1036 ' ' << pdb_name <<
' ' << rsd_type.
name() << std::endl;
1037 for (
Size k=1; k<= nbb; ++k ) {
1038 if ( !xyz.count( rsd_type.
atom_name(mainchain[k] ) ) ) {
1039 TR <<
"missing: " << rsd_type.
atom_name( mainchain[k] ) << std::endl;
1043 utility_exit_with_message(
"quitting due to missing heavy atoms");
1053 for ( ResidueCoords::const_iterator iter=xyz.begin(), iter_end=xyz.end(); iter!= iter_end; ++iter ) {
1060 double offset = 1e-250;
1080 if ( ( is_lower_terminus || !check_Ntermini_for_this_chain ) ||
1081 is_branch_lower_terminus ||
1082 !new_rsd->is_polymer() ||
1084 !last_residue_was_recognized) {
1090 pose_to_rinfo.push_back(
Size(i) );
1091 pose_resids.push_back( rinfo.
resid );
1092 pose_temps.push_back( rinfo.
temps );
1096 if ( ( ( is_lower_terminus || !check_Ntermini_for_this_chain ) || is_branch_lower_terminus) && pose.
total_residue() > 1 ) {
1100 last_residue_was_recognized =
true;
1108 for (
Size i=1; i<= nres; ++i ) {
1112 bool const check_Ntermini_for_this_chain = (
"ALL" == chains_to_check_if_Ntermini) ?
1113 true : find(check_Ntermini_begin, check_Ntermini_end, chainID ) == check_Ntermini_end;
1114 bool const check_Ctermini_for_this_chain = (
"ALL" == chains_to_check_if_Ctermini) ?
1115 true : find(check_Ctermini_begin, check_Ctermini_end, chainID ) == check_Ctermini_end;
1117 if ( !check_Ntermini_for_this_chain )
continue;
1118 if ( !check_Ctermini_for_this_chain )
continue;
1127 TR <<
"Adding undetected lower terminus type to residue " << i << std::endl;
1135 TR <<
"Adding undetected upper terminus type to residue " << i << std::endl;
1144 Size num_heavy_missing = 0;
1151 for(
Size i = 1; i <= UA_res_nums.size(); ++i ) {
1152 pdb_info->add_unrecognized_atom( UA_res_nums[i], UA_res_names[i], UA_atom_names[i], UA_coords[i], UA_temps[i] );
1158 utility_exit_with_message(
"ERROR: No residues in pose, empty file ? " );
1162 ResidueCoords
const &
xyz( rinfos[pose_to_rinfo[i]].
xyz );
1165 for (
Size j=1; j<= rsd.natoms(); ++j ) {
1166 if ( xyz.count( rsd.atom_name(j) ) == 0 ) {
1168 if( !rsd.atom_is_hydrogen(j) ) num_heavy_missing++;
1180 for (
Size i(1); i <= nres; ++i ) {
1185 std::istringstream ss( resid );
1187 pdb_numbering.push_back( resid_num );
1189 char const chain( rinfo.
resid[5] );
1190 pdb_chains.push_back( chain );
1192 char const icode( rinfo.
resid[4] );
1193 insertion_codes.push_back( icode );
1208 pdb_info->remarks( *fd.
remarks );
1213 pdb_info->set_numbering( pdb_numbering );
1214 pdb_info->set_chains( pdb_chains );
1215 pdb_info->set_icodes( insertion_codes );
1221 for (
uint seqpos(1), nres( pose.
total_residue() ); seqpos <= nres; ++seqpos ) {
1223 if ( ! rsd.type().is_DNA() )
continue;
1224 for (
uint atomi(1), natoms( rsd.natoms() ); atomi <= natoms; ++atomi ) {
1226 if ( missing[
id ] && rsd.atom_name(atomi) ==
" P " ) {
1227 TR <<
"Virtualizing missing phosphate that was built in at seqpos " << seqpos << std::endl;
1251 pdb_info->resize_atom_records( pose );
1254 for(
Size i = 1; i <= UA_res_nums.size(); ++i ) {
1255 pdb_info->add_unrecognized_atom( UA_res_nums[i], UA_res_names[i], UA_atom_names[i], UA_coords[i], UA_temps[i] );
1261 ResidueTemps & res_temps( rinfos[pose_to_rinfo[ir]].temps );
1262 for( ResidueTemps::const_iterator iter=res_temps.begin(); iter != res_temps.end(); ++iter ) {
1266 pdb_info->temperature( ir, ia, iter->second );
1268 if( (iter->first)[0] ==
'H' || ((iter->first)[0] ==
' ' && (iter->first)[1] ==
'H') ) {
1271 TR <<
"[ WARNING ] can't find atom for res " << ir <<
" atom " << iter->first <<
" (trying to set temp)" << std::endl;
1278 pdb_info->obsolete(
false );
1293 Size const pdb_residue_index,
1296 std::map< std::string, Vector >
const &
xyz,
1297 std::map< std::string, double >
const & rtemp,
1305 return is_residue_type_recognized( pdb_residue_index, pdb_name, rsd_type_list, xyz, rtemp, UA_res_nums, UA_res_names, UA_atom_names, UA_coords, UA_temps, options );
1319 Size const pdb_residue_index,
1322 std::map< std::string, Vector >
const &
xyz,
1323 std::map< std::string, double >
const & rtemp,
1331 if(!rsd_type_list.empty()){
1335 using namespace basic::options;
1340 utility_exit_with_message(
" unrecognized aa " + pdb_name );
1345 if( pdb_name ==
"HOH" ){
1351 for(std::map<std::string, Vector>::const_iterator iter=xyz.begin(), iter_end=xyz.end(); iter!= iter_end; ++iter ) {
1352 if( UA_res_nums.size() > 5000 ) {
1353 utility_exit_with_message(
"can't handle more than 5000 atoms worth of unknown residues\n");
1355 TR <<
"remember unrecognized atom " << pdb_residue_index <<
" " << pdb_name <<
" " <<
local_strip_whitespace(iter->first)
1356 <<
" temp " << rtemp.find(iter->first)->second << std::endl;
1357 UA_res_nums.push_back( pdb_residue_index );
1358 UA_res_names.push_back( pdb_name );
1360 UA_coords.push_back( iter->second );
1361 UA_temps.push_back( rtemp.find(iter->first)->second );
1384 using namespace chemical;
1388 pose_from_pose( new_pose, old_pose, *residue_set, residue_indices, options);
1399 pose_from_pose( new_pose, old_pose, residue_set, residue_indices, options );