-
Notifications
You must be signed in to change notification settings - Fork 344
[ENG-7759] Add new task to update node metadata with verified_links #11078
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: feature/verified-resource-linking
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -113,7 +113,7 @@ def walk(self, doi_override=None): | |
self._visit_rights(self.root) | ||
self._visit_descriptions(self.root, self.basket.focus.iri) | ||
self._visit_funding_references(self.root) | ||
self._visit_related(self.root) | ||
self._visit_related_and_verified_links(self.root) | ||
|
||
def _visit_identifier(self, parent_el, *, doi_override=None): | ||
if doi_override is None: | ||
|
@@ -373,13 +373,17 @@ def _visit_related_identifier_and_item(self, identifier_parent_el, item_parent_e | |
self._visit_publication_year(related_item_el, related_iri) | ||
self._visit_publisher(related_item_el, related_iri) | ||
|
||
def _visit_related(self, parent_el): | ||
def _visit_related_and_verified_links(self, parent_el): | ||
# Create related identifiers element and gather relation pairs | ||
relation_pairs = set() | ||
for relation_iri, datacite_relation in RELATED_IDENTIFIER_TYPE_MAP.items(): | ||
for related_iri in self.basket[relation_iri]: | ||
relation_pairs.add((datacite_relation, related_iri)) | ||
|
||
related_identifiers_el = self.visit(parent_el, 'relatedIdentifiers', is_list=True) | ||
related_items_el = self.visit(parent_el, 'relatedItems', is_list=True) | ||
|
||
# First add regular related identifiers | ||
for datacite_relation, related_iri in sorted(relation_pairs): | ||
self._visit_related_identifier_and_item( | ||
related_identifiers_el, | ||
|
@@ -388,6 +392,20 @@ def _visit_related(self, parent_el): | |
datacite_relation, | ||
) | ||
|
||
# Then add verified links to same relatedIdentifiers element | ||
osf_item = self.basket.focus.dbmodel | ||
verified_links = getattr(osf_item, 'verified_resource_links', None) | ||
if verified_links: | ||
for link, resource_type in verified_links.items(): | ||
if link and isinstance(link, str) and smells_like_iri(link): | ||
self.visit(related_identifiers_el, 'relatedIdentifier', text=link, attrib={ | ||
'relatedIdentifierType': 'URL', | ||
'relationType': 'References', | ||
'resourceTypeGeneral': resource_type | ||
}) | ||
else: | ||
logger.warning('skipping non-URL verified link "%s"', link) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Turn this into a sentry message. In addition, let's make it Pythonic: if not verified_links:
return
for ...:
if <false condition>:
# log sentry
continue
self.visit(...) |
||
|
||
def _visit_name_identifiers(self, parent_el, agent_iri): | ||
for identifier in sorted(self.basket[agent_iri:DCTERMS.identifier]): | ||
identifier_type, identifier_value = self._identifier_type_and_value(identifier) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
# Generated by Django 4.2.15 on 2025-04-10 12:49 | ||
|
||
from django.db import migrations | ||
import osf.utils.datetime_aware_jsonfield | ||
|
||
|
||
class Migration(migrations.Migration): | ||
|
||
dependencies = [ | ||
('osf', '0028_collection_grade_levels_choices_and_more'), | ||
] | ||
|
||
operations = [ | ||
migrations.AddField( | ||
model_name='abstractnode', | ||
name='verified_resource_links', | ||
field=osf.utils.datetime_aware_jsonfield.DateTimeAwareJSONField(blank=True, encoder=osf.utils.datetime_aware_jsonfield.DateTimeAwareJSONEncoder, null=True), | ||
), | ||
] |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -294,6 +294,7 @@ class AbstractNode(DirtyFieldsMixin, TypedModel, AddonModelMixin, IdentifierMixi | |
'category', | ||
'is_public', | ||
'node_license', | ||
'verified_resource_links', | ||
] | ||
|
||
# Named constants | ||
|
@@ -387,6 +388,8 @@ class AbstractNode(DirtyFieldsMixin, TypedModel, AddonModelMixin, IdentifierMixi | |
|
||
schema_responses = GenericRelation('osf.SchemaResponse', related_query_name='nodes') | ||
|
||
verified_resource_links = DateTimeAwareJSONField(null=True, blank=True) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Renamed and updated options in my PR addon_verified_resource_links = DateTimeAwareJSONField(default=dict, blank=True) |
||
|
||
class Meta: | ||
base_manager_name = 'objects' | ||
index_together = (('is_public', 'is_deleted', 'type')) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,3 +17,29 @@ def task__update_doi_metadata_on_change(self, target_guid): | |
@celery_app.task(ignore_results=True) | ||
def update_doi_metadata_on_change(target_guid): | ||
task__update_doi_metadata_on_change(target_guid) | ||
|
||
@celery_app.task(bind=True, max_retries=5, acks_late=True) | ||
def task__update_doi_metadata_with_verified_links(self, target_guid): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The task misses one important data: which is the links and types sent from GV. The task expects them to come in as arguments and the task should update the node. Otherwise, identifier update and Datacite tree-walk will see an empty dict. In addition, I am wondering if we can make identifier update automatic when this field is saved/updated on node. |
||
sentry.log_message('Updating DOI with verified links for guid', | ||
extra_data={'guid': target_guid}, | ||
level=logging.INFO) | ||
|
||
Guid = apps.get_model('osf.Guid') | ||
target_object = Guid.load(target_guid).referent | ||
try: | ||
|
||
target_object.request_identifier_update(category='doi') | ||
|
||
sentry.log_message('DOI metadata with verified links updated for guid', | ||
extra_data={'guid': target_guid}, | ||
level=logging.INFO) | ||
except Exception as exc: | ||
sentry.log_message('Failed to update DOI metadata with verified links', | ||
extra_data={'guid': target_guid, 'error': str(exc)}, | ||
level=logging.ERROR) | ||
raise self.retry(exc=exc) | ||
|
||
@queued_task | ||
@celery_app.task(ignore_results=True) | ||
def update_doi_metadata_with_verified_links(target_guid): | ||
task__update_doi_metadata_with_verified_links(target_guid) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I guess this check is here to make sure link looks like a link, which is good.
smells_like_iri()
precise enough? (i.e. not too loose or too strict)str
type so we can skip it.