Skip to content

[ENG-7759] Add new task to update node metadata with verified_links #11078

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: feature/verified-resource-linking
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions api/nodes/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,7 @@ class NodeSerializer(TaxonomizableSerializerMixin, JSONAPISerializer):
'template_node',
'title',
'type',
'verified_resource_links',
'view_only_links',
'wiki_enabled',
'wikis',
Expand Down Expand Up @@ -311,6 +312,8 @@ class NodeSerializer(TaxonomizableSerializerMixin, JSONAPISerializer):
links = LinksField({'html': 'get_absolute_html_url'})
# TODO: When we have osf_permissions.ADMIN permissions, make this writable for admins

verified_resource_links = ser.DictField(required=False, allow_null=True)

license = NodeLicenseRelationshipField(
related_view='licenses:license-detail',
related_view_kwargs={'license_id': '<license.node_license._id>'},
Expand Down
1 change: 1 addition & 0 deletions api/registrations/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ class RegistrationSerializer(NodeSerializer):
ser.SerializerMethodField(help_text='When the embargo on this registration will be lifted.'),
)
custom_citation = HideIfWithdrawal(ser.CharField(allow_blank=True, required=False))
verified_resource_links = ser.DictField(required=False, allow_null=True, read_only=True)

withdrawal_justification = ser.CharField(read_only=True)
template_from = HideIfWithdrawal(
Expand Down
2 changes: 1 addition & 1 deletion api_tests/base/test_serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ def test_registration_serializer(self):
'cedar_metadata_records',
]
# fields that do not appear on registrations
non_registration_fields = ['registrations', 'draft_registrations', 'templated_by_count', 'settings', 'storage', 'children', 'groups', 'subjects_acceptable']
non_registration_fields = ['registrations', 'draft_registrations', 'templated_by_count', 'settings', 'storage', 'children', 'groups', 'subjects_acceptable', 'verified_resource_links']

for field in NodeSerializer._declared_fields:
assert field in RegistrationSerializer._declared_fields
Expand Down
26 changes: 26 additions & 0 deletions api_tests/nodes/views/test_node_detail.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,7 @@ def test_node_properties(self, app, url_public):
assert res.json['data']['attributes']['registration'] is False
assert res.json['data']['attributes']['collection'] is False
assert res.json['data']['attributes']['tags'] == []
assert res.json['data']['attributes']['verified_resource_links'] is None

def test_requesting_folder_returns_error(self, app, user):
folder = CollectionFactory(creator=user)
Expand Down Expand Up @@ -1421,6 +1422,31 @@ def test_public_project_with_publicly_editable_wiki_turns_private(
)
assert res.status_code == 200

def test_update_verified_resource_links(self, app, user, project_public, url_public):
payload = {
'data': {
'id': project_public._id,
'type': 'nodes',
'attributes': {
'verified_resource_links': {
'https://doi.org/10.1234/5678': 'doi',
'https://arxiv.org/abs/1234.5678': 'arxiv'
}
}
}
}
res = app.patch_json_api(url_public, payload, auth=user.auth)
assert res.status_code == 200
assert res.json['data']['attributes']['verified_resource_links'] == {
'https://doi.org/10.1234/5678': 'doi',
'https://arxiv.org/abs/1234.5678': 'arxiv'
}

payload['data']['attributes']['verified_resource_links'] = {}
res = app.patch_json_api(url_public, payload, auth=user.auth)
assert res.status_code == 200
assert res.json['data']['attributes']['verified_resource_links'] == {}

@mock.patch('osf.models.node.update_doi_metadata_on_change')
def test_set_node_private_updates_doi(
self, mock_update_doi_metadata, app, user, project_public,
Expand Down
22 changes: 20 additions & 2 deletions osf/metadata/serializers/datacite/datacite_tree_walker.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def walk(self, doi_override=None):
self._visit_rights(self.root)
self._visit_descriptions(self.root, self.basket.focus.iri)
self._visit_funding_references(self.root)
self._visit_related(self.root)
self._visit_related_and_verified_links(self.root)

def _visit_identifier(self, parent_el, *, doi_override=None):
if doi_override is None:
Expand Down Expand Up @@ -373,13 +373,17 @@ def _visit_related_identifier_and_item(self, identifier_parent_el, item_parent_e
self._visit_publication_year(related_item_el, related_iri)
self._visit_publisher(related_item_el, related_iri)

def _visit_related(self, parent_el):
def _visit_related_and_verified_links(self, parent_el):
# Create related identifiers element and gather relation pairs
relation_pairs = set()
for relation_iri, datacite_relation in RELATED_IDENTIFIER_TYPE_MAP.items():
for related_iri in self.basket[relation_iri]:
relation_pairs.add((datacite_relation, related_iri))

related_identifiers_el = self.visit(parent_el, 'relatedIdentifiers', is_list=True)
related_items_el = self.visit(parent_el, 'relatedItems', is_list=True)

# First add regular related identifiers
for datacite_relation, related_iri in sorted(relation_pairs):
self._visit_related_identifier_and_item(
related_identifiers_el,
Expand All @@ -388,6 +392,20 @@ def _visit_related(self, parent_el):
datacite_relation,
)

# Then add verified links to same relatedIdentifiers element
osf_item = self.basket.focus.dbmodel
verified_links = getattr(osf_item, 'verified_resource_links', None)
if verified_links:
for link, resource_type in verified_links.items():
if link and isinstance(link, str) and smells_like_iri(link):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess this check is here to make sure link looks like a link, which is good.

  • Is smells_like_iri() precise enough? (i.e. not too loose or too strict)
  • If so, it already checks the str type so we can skip it.

self.visit(related_identifiers_el, 'relatedIdentifier', text=link, attrib={
'relatedIdentifierType': 'URL',
'relationType': 'References',
'resourceTypeGeneral': resource_type
})
else:
logger.warning('skipping non-URL verified link "%s"', link)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Turn this into a sentry message. In addition, let's make it Pythonic:

if not verified_links:
    return
for ...:
    if <false condition>:
        # log sentry
        continue
    self.visit(...)


def _visit_name_identifiers(self, parent_el, agent_iri):
for identifier in sorted(self.basket[agent_iri:DCTERMS.identifier]):
identifier_type, identifier_value = self._identifier_type_and_value(identifier)
Expand Down
19 changes: 19 additions & 0 deletions osf/migrations/0029_abstractnode_verified_resource_links.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Generated by Django 4.2.15 on 2025-04-10 12:49

from django.db import migrations
import osf.utils.datetime_aware_jsonfield


class Migration(migrations.Migration):

dependencies = [
('osf', '0028_collection_grade_levels_choices_and_more'),
]

operations = [
migrations.AddField(
model_name='abstractnode',
name='verified_resource_links',
field=osf.utils.datetime_aware_jsonfield.DateTimeAwareJSONField(blank=True, encoder=osf.utils.datetime_aware_jsonfield.DateTimeAwareJSONEncoder, null=True),
),
]
3 changes: 3 additions & 0 deletions osf/models/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,7 @@ class AbstractNode(DirtyFieldsMixin, TypedModel, AddonModelMixin, IdentifierMixi
'category',
'is_public',
'node_license',
'verified_resource_links',
]

# Named constants
Expand Down Expand Up @@ -387,6 +388,8 @@ class AbstractNode(DirtyFieldsMixin, TypedModel, AddonModelMixin, IdentifierMixi

schema_responses = GenericRelation('osf.SchemaResponse', related_query_name='nodes')

verified_resource_links = DateTimeAwareJSONField(null=True, blank=True)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Renamed and updated options in my PR

addon_verified_resource_links = DateTimeAwareJSONField(default=dict, blank=True)


class Meta:
base_manager_name = 'objects'
index_together = (('is_public', 'is_deleted', 'type'))
Expand Down
3 changes: 3 additions & 0 deletions website/identifiers/clients/datacite.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ def create_identifier(self, node, category, doi_value=None):
doi_value = doi_value or self._get_doi_value(node)
metadata_record_xml = self.build_metadata(node, doi_value, as_xml=True)
if settings.DATACITE_ENABLED:
if isinstance(metadata_record_xml, bytes):
metadata_record_xml = metadata_record_xml.decode('utf-8')

resp = self._client.metadata_post(metadata_record_xml)
# Typical response: 'OK (10.70102/FK2osf.io/cq695)' to doi 10.70102/FK2osf.io/cq695
doi = re.match(r'OK \((?P<doi>[a-zA-Z0-9 .\/]{0,})\)', resp).groupdict()['doi']
Expand Down
26 changes: 26 additions & 0 deletions website/identifiers/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,29 @@ def task__update_doi_metadata_on_change(self, target_guid):
@celery_app.task(ignore_results=True)
def update_doi_metadata_on_change(target_guid):
task__update_doi_metadata_on_change(target_guid)

@celery_app.task(bind=True, max_retries=5, acks_late=True)
def task__update_doi_metadata_with_verified_links(self, target_guid):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The task misses one important data: which is the links and types sent from GV. The task expects them to come in as arguments and the task should update the node. Otherwise, identifier update and Datacite tree-walk will see an empty dict.

In addition, I am wondering if we can make identifier update automatic when this field is saved/updated on node.

sentry.log_message('Updating DOI with verified links for guid',
extra_data={'guid': target_guid},
level=logging.INFO)

Guid = apps.get_model('osf.Guid')
target_object = Guid.load(target_guid).referent
try:

target_object.request_identifier_update(category='doi')

sentry.log_message('DOI metadata with verified links updated for guid',
extra_data={'guid': target_guid},
level=logging.INFO)
except Exception as exc:
sentry.log_message('Failed to update DOI metadata with verified links',
extra_data={'guid': target_guid, 'error': str(exc)},
level=logging.ERROR)
raise self.retry(exc=exc)

@queued_task
@celery_app.task(ignore_results=True)
def update_doi_metadata_with_verified_links(target_guid):
task__update_doi_metadata_with_verified_links(target_guid)
Loading