Skip to content

Feature/behave like node #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions posenet/base_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def preprocess_input(self, image):
pass

def predict(self, image):
input_image, image_scale = self.preprocess_input(image)
input_image, image_scale, image_padding = self.preprocess_input(image)

input_image = tf.convert_to_tensor(input_image, dtype=tf.float32)

Expand All @@ -37,4 +37,4 @@ def predict(self, image):
displacement_fwd_result = result[self.output_tensor_names[self.DISPLACEMENT_FWD_KEY]]
displacement_bwd_result = result[self.output_tensor_names[self.DISPLACEMENT_BWD_KEY]]

return tf.sigmoid(heatmap_result), offsets_result, displacement_fwd_result, displacement_bwd_result, image_scale
return tf.sigmoid(heatmap_result), offsets_result, displacement_fwd_result, displacement_bwd_result, image_scale, image_padding
17 changes: 11 additions & 6 deletions posenet/decode.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@


def traverse_to_targ_keypoint(
edge_id, source_keypoint, target_keypoint_id, scores, offsets, output_stride, displacements
edge_id, source_keypoint, target_keypoint_id, scores, offsets, output_stride, displacements, offset_refine_step = 2
):
height = scores.shape[0]
width = scores.shape[1]
Expand All @@ -15,15 +15,20 @@ def traverse_to_targ_keypoint(
displaced_point = source_keypoint + displacements[
source_keypoint_indices[0], source_keypoint_indices[1], edge_id]

for i in range(0, offset_refine_step):
displaced_point_indices = np.clip(
np.round(displaced_point / output_stride), a_min=0, a_max=[height - 1, width - 1]).astype(np.int32)

displaced_point = displaced_point_indices * output_stride + offsets[
displaced_point_indices[0], displaced_point_indices[1], target_keypoint_id]

displaced_point_indices = np.clip(
np.round(displaced_point / output_stride), a_min=0, a_max=[height - 1, width - 1]).astype(np.int32)

score = scores[displaced_point_indices[0], displaced_point_indices[1], target_keypoint_id]

image_coord = displaced_point_indices * output_stride + offsets[
displaced_point_indices[0], displaced_point_indices[1], target_keypoint_id]
score = scores[displaced_point_indices[0],
displaced_point_indices[1], target_keypoint_id]

return score, image_coord
return score, displaced_point


def decode_pose(
Expand Down
12 changes: 11 additions & 1 deletion posenet/mobilenet.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,21 @@ def __init__(self, model_function, output_tensor_names, output_stride):

def preprocess_input(self, image):
target_width, target_height = self.valid_resolution(image.shape[1], image.shape[0])
# the padding to keep the aspect ratio:
target_aspect = target_width / target_height
aspect = image.shape[1] / image.shape[0]
if aspect < target_aspect:
padding = np.array([0, round(0.5 * (target_aspect * image.shape[0] - image.shape[1]))])
else:
padding = np.array([round(0.5 * ((1.0 / target_aspect) * image.shape[1] - image.shape[0])), 0])
image = cv2.copyMakeBorder(image, padding[0], padding[0], padding[1], padding[1],
cv2.BORDER_CONSTANT, value=[0,0,0])

# the scale that can get us back to the original width and height:
scale = np.array([image.shape[0] / target_height, image.shape[1] / target_width])
input_img = cv2.resize(image, (target_width, target_height), interpolation=cv2.INTER_LINEAR)
input_img = cv2.cvtColor(input_img, cv2.COLOR_BGR2RGB).astype(np.float32) # to RGB colors

input_img = input_img * (2.0 / 255.0) - 1.0 # normalize to [-1,1]
input_img = input_img.reshape(1, target_height, target_width, 3) # NHWC
return input_img, scale
return input_img, scale, padding
4 changes: 2 additions & 2 deletions posenet/posenet.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ def __init__(self, model: BaseModel, min_score=0.25):
self.min_score = min_score

def estimate_multiple_poses(self, image, max_pose_detections=10):
heatmap_result, offsets_result, displacement_fwd_result, displacement_bwd_result, image_scale = \
heatmap_result, offsets_result, displacement_fwd_result, displacement_bwd_result, image_scale, image_padding = \
self.model.predict(image)

pose_scores, keypoint_scores, keypoint_coords = posenet.decode_multiple_poses(
Expand All @@ -21,7 +21,7 @@ def estimate_multiple_poses(self, image, max_pose_detections=10):
max_pose_detections=max_pose_detections,
min_pose_score=self.min_score)

keypoint_coords *= image_scale
keypoint_coords = keypoint_coords * image_scale - image_padding

return pose_scores, keypoint_scores, keypoint_coords

Expand Down
12 changes: 11 additions & 1 deletion posenet/resnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,16 @@ def __init__(self, model_function, output_tensor_names, output_stride):

def preprocess_input(self, image):
target_width, target_height = self.valid_resolution(image.shape[1], image.shape[0])
# the padding to keep the aspect ratio:
target_aspect = target_width / target_height
aspect = image.shape[1] / image.shape[0]
if aspect < target_aspect:
padding = np.array([0, round(0.5 * (target_aspect * image.shape[0] - image.shape[1]))])
else:
padding = np.array([round(0.5 * ((1.0 / target_aspect) * image.shape[1] - image.shape[0])), 0])
image = cv2.copyMakeBorder(image, padding[0], padding[0], padding[1], padding[1],
cv2.BORDER_CONSTANT, value=[0,0,0])

# the scale that can get us back to the original width and height:
scale = np.array([image.shape[0] / target_height, image.shape[1] / target_width])
input_img = cv2.resize(image, (target_width, target_height), interpolation=cv2.INTER_LINEAR)
Expand All @@ -20,4 +30,4 @@ def preprocess_input(self, image):
# See: https://github.com/tensorflow/tfjs-models/blob/master/body-pix/src/resnet.ts
input_img = input_img + self.image_net_mean
input_img = input_img.reshape(1, target_height, target_width, 3) # HWC to NHWC
return input_img, scale
return input_img, scale, padding