Skip to content

Update findseq for multiple objects #161

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 46 additions & 46 deletions findseq.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,21 +70,6 @@


def findseq(needle, haystack, selName=None, het=0, firstOnly=0):
# set the name of the selection to return.
if selName == None:
rSelName = "foundSeq" + str(random.randint(0, 32000))
selName = rSelName
elif selName == "sele":
rSelName = "sele"
else:
rSelName = selName

# input checking
if not checkParams(needle, haystack, selName, het, firstOnly):
print("There was an error with a parameter. Please see")
print("the above error message for how to fix it.")
return None

one_letter = {
'00C': 'C', '01W': 'X', '0A0': 'D', '0A1': 'Y', '0A2': 'K',
'0A8': 'C', '0AA': 'V', '0AB': 'V', '0AC': 'G', '0AD': 'G',
Expand Down Expand Up @@ -335,43 +320,58 @@ def findseq(needle, haystack, selName=None, het=0, firstOnly=0):
'YG ': 'G', 'YOF': 'Y', 'YRR': 'N', 'YYG': 'G', 'Z ': 'C',
'ZAD': 'A', 'ZAL': 'A', 'ZBC': 'C', 'ZCY': 'C', 'ZDU': 'U',
'ZFB': 'X', 'ZGU': 'G', 'ZHP': 'N', 'ZTH': 'T', 'ZZJ': 'A'}

# remove hetero atoms (waters/ligands/etc) from consideration?
if het:
cmd.select("__h", "br. " + haystack)
# set the name of the selection to return.
if selName == None:
rSelName = "foundSeq" + str(random.randint(0, 32000))
selName = rSelName
elif selName == "sele":
rSelName = "sele"
else:
cmd.select("__h", "br. " + haystack + " and not het")
rSelName = selName
# make an empty selection to which we add residues
cmd.select(rSelName, 'None')
for obj in cmd.get_object_list(haystack):
# input checking
if not checkParams(needle, haystack, selName, het, firstOnly):
print("There was an error with a parameter. Please see")
print("the above error message for how to fix it.")
return None

# get the AAs in the haystack
aaDict = {'aaList': []}
cmd.iterate("(name ca) and __h", "aaList.append((resi,resn,chain))", space=aaDict)

IDs = [x[0] for x in aaDict['aaList']]
AAs = ''.join([one_letter[x[1]] for x in aaDict['aaList']])
chains = [x[2] for x in aaDict['aaList']]
# remove hetero atoms (waters/ligands/etc) from consideration?
if het:
cmd.select("__h", f"br. {obj} and {haystack}")
else:
cmd.select("__h", f"br. {obj} and {haystack} and not het")

reNeedle = re.compile(needle.upper())
it = reNeedle.finditer(AAs)
# get the AAs in the haystack
aaDict = {'aaList': []}
cmd.iterate("(name ca) and __h", "aaList.append((resi,resn,chain))", space=aaDict)

# make an empty selection to which we add residues
cmd.select(rSelName, 'None')
IDs = [x[0] for x in aaDict['aaList']]
AAs = ''.join([one_letter[x[1]] for x in aaDict['aaList']])
chains = [x[2] for x in aaDict['aaList']]

for i in it:
(start, stop) = i.span()
# we found some residues, which chains are they from?
i_chains = chains[start:stop]
# are all residues from one chain?
if len(set(i_chains)) != 1:
# now they are not, this match is not really a match, skip it
continue
chain = i_chains[0]
# Only apply chains to selection algebra if there are defined chains.
if chain:
cmd.select(rSelName, rSelName + " or (__h and i. " + '+'.join(IDs[ii] for ii in range(start, stop)) + " and c. " + chain + " )")
else:
cmd.select(rSelName, rSelName + " or (__h and i. " + '+'.join(IDs[ii] for ii in range(start, stop)) + ")")
if int(firstOnly):
break
reNeedle = re.compile(needle.upper())
it = reNeedle.finditer(AAs)


for i in it:
(start, stop) = i.span()
# we found some residues, which chains are they from?
i_chains = chains[start:stop]
# are all residues from one chain?
if len(set(i_chains)) != 1:
# now they are not, this match is not really a match, skip it
continue
chain = i_chains[0]
# Only apply chains to selection algebra if there are defined chains.
if chain:
cmd.select(rSelName, rSelName + " or (__h and i. " + '+'.join(IDs[ii] for ii in range(start, stop)) + " and c. " + chain + " )")
else:
cmd.select(rSelName, rSelName + " or (__h and i. " + '+'.join(IDs[ii] for ii in range(start, stop)) + ")")
if int(firstOnly):
break
cmd.delete("__h")
return rSelName
cmd.extend("findseq", findseq)
Expand Down